From a429dfc167258cf023b2e4c20874a228298372e6 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Fri, 10 Jan 2025 00:59:21 +0900 Subject: [PATCH 01/79] [Driver][SPIR-V] Use consistent tools to convert between text and binary form (#120266) Currently we produce SPIR-V text with `spirv-dis` but assemble it with `llvm-spirv`. The SPIR-V text format is different between the tools so the assemble fails. Use `spirv-as` for assembly as it uses the same format. --------- Signed-off-by: Sarnie, Nick --- clang/lib/Driver/ToolChains/SPIRV.cpp | 49 +++++++++++++++++++++++++-- clang/lib/Driver/ToolChains/SPIRV.h | 19 +++++++++++ clang/test/Driver/spirv-toolchain.cl | 4 +-- 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SPIRV.cpp b/clang/lib/Driver/ToolChains/SPIRV.cpp index 659da5c7f25aa..5a7894f5435fc 100644 --- a/clang/lib/Driver/ToolChains/SPIRV.cpp +++ b/clang/lib/Driver/ToolChains/SPIRV.cpp @@ -26,8 +26,8 @@ void SPIRV::constructTranslateCommand(Compilation &C, const Tool &T, llvm::opt::ArgStringList CmdArgs(Args); CmdArgs.push_back(Input.getFilename()); - if (Input.getType() == types::TY_PP_Asm) - CmdArgs.push_back("-to-binary"); + assert(Input.getType() != types::TY_PP_Asm && "Unexpected input type"); + if (Output.getType() == types::TY_PP_Asm) CmdArgs.push_back("--spirv-tools-dis"); @@ -46,6 +46,31 @@ void SPIRV::constructTranslateCommand(Compilation &C, const Tool &T, Exec, CmdArgs, Input, Output)); } +void SPIRV::constructAssembleCommand(Compilation &C, const Tool &T, + const JobAction &JA, + const InputInfo &Output, + const InputInfo &Input, + const llvm::opt::ArgStringList &Args) { + llvm::opt::ArgStringList CmdArgs(Args); + CmdArgs.push_back(Input.getFilename()); + + assert(Input.getType() == types::TY_PP_Asm && "Unexpected input type"); + + CmdArgs.append({"-o", Output.getFilename()}); + + // Try to find "spirv-as-". Otherwise, fall back to + // plain "spirv-as". + using namespace std::string_literals; + auto VersionedTool = "spirv-as-"s + std::to_string(LLVM_VERSION_MAJOR); + std::string ExeCand = T.getToolChain().GetProgramPath(VersionedTool.c_str()); + if (!llvm::sys::fs::can_execute(ExeCand)) + ExeCand = T.getToolChain().GetProgramPath("spirv-as"); + + const char *Exec = C.getArgs().MakeArgString(ExeCand); + C.addCommand(std::make_unique(JA, T, ResponseFileSupport::None(), + Exec, CmdArgs, Input, Output)); +} + void SPIRV::Translator::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -57,12 +82,29 @@ void SPIRV::Translator::ConstructJob(Compilation &C, const JobAction &JA, constructTranslateCommand(C, *this, JA, Output, Inputs[0], {}); } +void SPIRV::Assembler::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *AssembleOutput) const { + claimNoWarnArgs(Args); + if (Inputs.size() != 1) + llvm_unreachable("Invalid number of input files."); + constructAssembleCommand(C, *this, JA, Output, Inputs[0], {}); +} + clang::driver::Tool *SPIRVToolChain::getTranslator() const { if (!Translator) Translator = std::make_unique(*this); return Translator.get(); } +clang::driver::Tool *SPIRVToolChain::getAssembler() const { + if (!Assembler) + Assembler = std::make_unique(*this); + return Assembler.get(); +} + clang::driver::Tool *SPIRVToolChain::SelectTool(const JobAction &JA) const { Action::ActionClass AC = JA.getKind(); return SPIRVToolChain::getTool(AC); @@ -73,8 +115,9 @@ clang::driver::Tool *SPIRVToolChain::getTool(Action::ActionClass AC) const { default: break; case Action::BackendJobClass: - case Action::AssembleJobClass: return SPIRVToolChain::getTranslator(); + case Action::AssembleJobClass: + return SPIRVToolChain::getAssembler(); } return ToolChain::getTool(AC); } diff --git a/clang/lib/Driver/ToolChains/SPIRV.h b/clang/lib/Driver/ToolChains/SPIRV.h index 415f639bba3ec..44187084e34ec 100644 --- a/clang/lib/Driver/ToolChains/SPIRV.h +++ b/clang/lib/Driver/ToolChains/SPIRV.h @@ -22,6 +22,11 @@ void constructTranslateCommand(Compilation &C, const Tool &T, const InputInfo &Input, const llvm::opt::ArgStringList &Args); +void constructAssembleCommand(Compilation &C, const Tool &T, + const JobAction &JA, const InputInfo &Output, + const InputInfo &Input, + const llvm::opt::ArgStringList &Args); + class LLVM_LIBRARY_VISIBILITY Translator : public Tool { public: Translator(const ToolChain &TC) @@ -47,6 +52,17 @@ class LLVM_LIBRARY_VISIBILITY Linker final : public Tool { const char *LinkingOutput) const override; }; +class LLVM_LIBRARY_VISIBILITY Assembler final : public Tool { +public: + Assembler(const ToolChain &TC) : Tool("SPIRV::Assembler", "spirv-as", TC) {} + bool hasIntegratedAssembler() const override { return false; } + bool hasIntegratedCPP() const override { return false; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *AssembleOutput) const override; +}; + } // namespace SPIRV } // namespace tools @@ -54,6 +70,7 @@ namespace toolchains { class LLVM_LIBRARY_VISIBILITY SPIRVToolChain : public ToolChain { mutable std::unique_ptr Translator; + mutable std::unique_ptr Assembler; public: SPIRVToolChain(const Driver &D, const llvm::Triple &Triple, @@ -81,6 +98,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRVToolChain : public ToolChain { private: clang::driver::Tool *getTranslator() const; + clang::driver::Tool *getAssembler() const; + bool NativeLLVMSupport; }; diff --git a/clang/test/Driver/spirv-toolchain.cl b/clang/test/Driver/spirv-toolchain.cl index eff02f809ce83..33c7bc0a63adf 100644 --- a/clang/test/Driver/spirv-toolchain.cl +++ b/clang/test/Driver/spirv-toolchain.cl @@ -43,7 +43,7 @@ // Check assembly input -> object output // RUN: %clang -### --target=spirv64 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s // RUN: %clang -### --target=spirv32 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s -// ASM: {{llvm-spirv.*"}} {{".*"}} "-to-binary" "-o" {{".*o"}} +// ASM: {{spirv-as.*"}} {{".*"}} "-o" {{".*o"}} //----------------------------------------------------------------------------- // Check --save-temps. @@ -56,7 +56,7 @@ // TMP-SAME: "-o" [[BC:".*bc"]] // TMP-SAME: [[I]] // TMP: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" [[S:".*s"]] -// TMP: {{llvm-spirv.*"}} [[S]] "-to-binary" "-o" {{".*o"}} +// TMP: {{spirv-as.*"}} [[S]] "-o" {{".*o"}} //----------------------------------------------------------------------------- // Check linking when multiple input files are passed. From 762f1b17b2815ccdfb4e5cb5412cb6210db92f73 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Thu, 9 Jan 2025 08:00:05 -0800 Subject: [PATCH 02/79] [HLSL] Make fast math the default for HLSL (#119820) Make fast math the default for HLSL Repair test cases broken by this change. Closes #108597 --- clang/include/clang/Driver/Options.td | 2 +- clang/test/CodeGenHLSL/ArrayTemporary.hlsl | 4 +- .../BasicFeatures/OutputArguments.hlsl | 4 +- .../standard_conversion_sequences.hlsl | 6 +- .../CodeGenHLSL/builtins/ScalarSwizzles.hlsl | 2 +- .../StructuredBuffers-methods-lib.hlsl | 6 +- .../StructuredBuffers-methods-ps.hlsl | 2 +- .../CodeGenHLSL/builtins/WaveReadLaneAt.hlsl | 12 +-- clang/test/CodeGenHLSL/builtins/abs.hlsl | 64 ++++++------- clang/test/CodeGenHLSL/builtins/acos.hlsl | 24 ++--- clang/test/CodeGenHLSL/builtins/asdouble.hlsl | 4 +- clang/test/CodeGenHLSL/builtins/asin.hlsl | 24 ++--- clang/test/CodeGenHLSL/builtins/atan.hlsl | 24 ++--- clang/test/CodeGenHLSL/builtins/atan2.hlsl | 24 ++--- clang/test/CodeGenHLSL/builtins/ceil.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/clamp.hlsl | 72 +++++++------- clang/test/CodeGenHLSL/builtins/clip.hlsl | 8 +- clang/test/CodeGenHLSL/builtins/cos.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/cosh.hlsl | 24 ++--- clang/test/CodeGenHLSL/builtins/cross.hlsl | 14 +-- clang/test/CodeGenHLSL/builtins/degrees.hlsl | 32 +++---- .../CodeGenHLSL/builtins/dot-builtin.hlsl | 16 ++-- clang/test/CodeGenHLSL/builtins/dot.hlsl | 26 ++--- clang/test/CodeGenHLSL/builtins/exp.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/exp2.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/floor.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/fmod.hlsl | 24 ++--- clang/test/CodeGenHLSL/builtins/frac.hlsl | 32 +++---- clang/test/CodeGenHLSL/builtins/length.hlsl | 40 ++++---- .../CodeGenHLSL/builtins/lerp-builtin.hlsl | 4 +- clang/test/CodeGenHLSL/builtins/lerp.hlsl | 32 +++---- clang/test/CodeGenHLSL/builtins/log.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/log10.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/log2.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/mad.hlsl | 32 +++---- clang/test/CodeGenHLSL/builtins/max.hlsl | 64 ++++++------- clang/test/CodeGenHLSL/builtins/min.hlsl | 64 ++++++------- .../test/CodeGenHLSL/builtins/normalize.hlsl | 32 +++---- clang/test/CodeGenHLSL/builtins/pow.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/radians.hlsl | 32 +++---- clang/test/CodeGenHLSL/builtins/rcp.hlsl | 96 +++++++++---------- clang/test/CodeGenHLSL/builtins/round.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/rsqrt.hlsl | 32 +++---- clang/test/CodeGenHLSL/builtins/saturate.hlsl | 32 +++---- clang/test/CodeGenHLSL/builtins/sin.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/sinh.hlsl | 24 ++--- clang/test/CodeGenHLSL/builtins/sqrt.hlsl | 48 +++++----- clang/test/CodeGenHLSL/builtins/step.hlsl | 32 +++---- clang/test/CodeGenHLSL/builtins/tan.hlsl | 24 ++--- clang/test/CodeGenHLSL/builtins/tanh.hlsl | 24 ++--- clang/test/CodeGenHLSL/builtins/trunc.hlsl | 48 +++++----- clang/test/CodeGenHLSL/float3.hlsl | 2 +- clang/test/CodeGenHLSL/this-reference.hlsl | 2 +- 53 files changed, 821 insertions(+), 821 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 52823430919de..eb860f73121fd 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2285,7 +2285,7 @@ def ffp_exception_behavior_EQ : Joined<["-"], "ffp-exception-behavior=">, Group< NormalizedValues<["FPE_Ignore", "FPE_MayTrap", "FPE_Strict"]>, MarshallingInfoEnum, "FPE_Default">; defm fast_math : BoolFOption<"fast-math", - LangOpts<"FastMath">, DefaultFalse, + LangOpts<"FastMath">, Default, PosFlag, diff --git a/clang/test/CodeGenHLSL/ArrayTemporary.hlsl b/clang/test/CodeGenHLSL/ArrayTemporary.hlsl index 7d77c0aff736c..e5db7eac37a42 100644 --- a/clang/test/CodeGenHLSL/ArrayTemporary.hlsl +++ b/clang/test/CodeGenHLSL/ArrayTemporary.hlsl @@ -90,11 +90,11 @@ void template_call(float FA2[2], float FA4[4], int IA3[3]) { // CHECK: [[Addr:%.*]] = getelementptr inbounds [2 x float], ptr [[FA2]], i32 0, i32 0 // CHECK: [[Tmp:%.*]] = load float, ptr [[Addr]] -// CHECK: call void @_Z11template_fnIfEvT_(float noundef [[Tmp]]) +// CHECK: call void @_Z11template_fnIfEvT_(float noundef nofpclass(nan inf) [[Tmp]]) // CHECK: [[Idx0:%.*]] = getelementptr inbounds [2 x float], ptr [[FA2]], i32 0, i32 0 // CHECK: [[Val0:%.*]] = load float, ptr [[Idx0]] -// CHECK: [[Sum:%.*]] = fadd float [[Val0]], 5.000000e+00 +// CHECK: [[Sum:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[Val0]], 5.000000e+00 // CHECK: [[Idx1:%.*]] = getelementptr inbounds [2 x float], ptr [[FA2]], i32 0, i32 1 // CHECK: store float [[Sum]], ptr [[Idx1]] diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl index 6afead4f23366..289bbb959e8a4 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl @@ -9,7 +9,7 @@ // CHECK: define void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 4 dereferenceable(4) {{%.*}}) void trunc_Param(inout int X) {} -// ALL-LABEL: define noundef float {{.*}}case1 +// ALL-LABEL: define noundef nofpclass(nan inf) float {{.*}}case1 // CHECK: [[F:%.*]] = alloca float // CHECK: [[ArgTmp:%.*]] = alloca i32 // CHECK: [[FVal:%.*]] = load float, ptr {{.*}} @@ -197,7 +197,7 @@ export int case7() { // CHECK: define void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) {{%.*}}) void trunc_vec(inout int3 V) {} -// ALL-LABEL: define noundef <3 x float> {{.*}}case8 +// ALL-LABEL: define noundef nofpclass(nan inf) <3 x float> {{.*}}case8 // CHECK: [[V:%.*]] = alloca <3 x float> // CHECK: [[Tmp:%.*]] = alloca <3 x i32> diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl index dd7dfd1769703..1665a0260ab05 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl @@ -6,7 +6,7 @@ // CHECK: store <3 x float> splat (float 1.000000e+00), ptr [[f3]] // CHECK: [[vecf3:%.*]] = load <3 x float>, ptr [[f3]] // CHECK: [[vecf4:%.*]] = shufflevector <3 x float> [[vecf3]], <3 x float> poison, <4 x i32> -// CHECK: [[vecd4:%.*]] = fpext <4 x float> [[vecf4]] to <4 x double> +// CHECK: [[vecd4:%.*]] = fpext reassoc nnan ninf nsz arcp afn <4 x float> [[vecf4]] to <4 x double> // CHECK: store <4 x double> [[vecd4]], ptr [[d4]] void f3_to_d4() { vector f3 = 1.0; @@ -30,7 +30,7 @@ void f3_to_f2() { // CHECK: [[f2:%.*]] = alloca <2 x float> // CHECK: store <4 x double> splat (double 3.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] -// CHECK: [[vecf4:%.*]] = fptrunc <4 x double> [[vecd4]] to <4 x float> +// CHECK: [[vecf4:%.*]] = fptrunc reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x float> // CHECK: [[vecf2:%.*]] = shufflevector <4 x float> [[vecf4]], <4 x float> poison, <2 x i32> // CHECK: store <2 x float> [[vecf2]], ptr [[f2]] void d4_to_f2() { @@ -108,7 +108,7 @@ void i2_to_b2() { // CHECK: [[b2:%.*]] = alloca i8 // CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] -// CHECK: [[vecb4:%.*]] = fcmp une <4 x double> [[vecd4]], zeroinitializer +// CHECK: [[vecb4:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <4 x double> [[vecd4]], zeroinitializer // CHECK: [[vecd2:%.*]] = shufflevector <4 x i1> [[vecb4]], <4 x i1> poison, <2 x i32> // CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecd2]], <2 x i1> poison, <8 x i32> // CHECK: [[i8:%.*]] = bitcast <8 x i1> [[vecb8]] to i8 diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl index 94a95107eea69..97711c9ee25a1 100644 --- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl +++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl @@ -119,7 +119,7 @@ float2 HowManyFloats(float V) { // CHECK: store <1 x double> splat (double 1.000000e+00), ptr [[Tmp]], align 8 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[Tmp]], align 8 // CHECK: [[vec3:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <3 x i32> zeroinitializer -// CHECK: [[vec3f:%.*]] = fptrunc <3 x double> [[vec3]] to <3 x float> +// CHECK: [[vec3f:%.*]] = fptrunc reassoc nnan ninf nsz arcp afn <3 x double> [[vec3]] to <3 x float> // CHECK: ret <3 x float> [[vec3f]] float3 AllRighty() { diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl index 383f591f676d9..93aa218f63ecf 100644 --- a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl @@ -33,7 +33,7 @@ export void TestAppend(float value) { ASB.Append(value); } -// CHECK: define void @_Z10TestAppendf(float noundef %value) +// CHECK: define void @_Z10TestAppendf(float noundef nofpclass(nan inf) %value) // CHECK-DXIL: %[[VALUE:.*]] = load float, ptr %value.addr, align 4 // CHECK-DXIL: %[[INDEX:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 1) // CHECK-DXIL: %[[RESPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i32 %[[INDEX]]) @@ -43,7 +43,7 @@ export float TestConsume() { return CSB.Consume(); } -// CHECK: define noundef float @_Z11TestConsumev() +// CHECK: define noundef nofpclass(nan inf) float @_Z11TestConsumev() // CHECK-DXIL: %[[INDEX:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %1, i8 -1) // CHECK-DXIL: %[[RESPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %0, i32 %[[INDEX]]) // CHECK-DXIL: %[[VALUE:.*]] = load float, ptr %[[RESPTR]], align 4 @@ -53,7 +53,7 @@ export float TestLoad() { return RWSB1.Load(1) + SB1.Load(2); } -// CHECK: define noundef float @_Z8TestLoadv() +// CHECK: define noundef nofpclass(nan inf) float @_Z8TestLoadv() // CHECK: %[[PTR1:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i32 %{{[0-9]+}}) // CHECK: %[[VALUE1:.*]] = load float, ptr %[[PTR1]] // CHECK: %[[PTR2:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_0_0t(target("dx.RawBuffer", float, 0, 0) %{{[0-9]+}}, i32 %{{[0-9]+}}) diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl index 3d9f4f68ec7d2..5b1d8e3052eae 100644 --- a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl @@ -28,7 +28,7 @@ export float TestLoad() { return ROSB1.Load(10); } -// CHECK: define noundef float @_Z8TestLoadv() +// CHECK: define noundef nofpclass(nan inf) float @_Z8TestLoadv() // CHECK: %[[PTR1:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %{{[0-9]+}}, i32 %{{[0-9]+}}) // CHECK: %[[VALUE1:.*]] = load float, ptr %[[PTR1]] diff --git a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl index 093a199a32bdc..d56a6af26896b 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl @@ -38,8 +38,8 @@ int16_t test_int16(int16_t expr, uint idx) { // CHECK-LABEL: test_half half test_half(half expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok2:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok2]]) ] - // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY:.*]] @llvm.spv.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok2]]) ] + // CHECK-DXIL: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] @llvm.dx.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); } @@ -50,8 +50,8 @@ half test_half(half expr, uint idx) { // CHECK-LABEL: test_double double test_double(double expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok3:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok3]]) ] - // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY:.*]] @llvm.spv.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok3]]) ] + // CHECK-DXIL: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] @llvm.dx.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); } @@ -62,8 +62,8 @@ double test_double(double expr, uint idx) { // CHECK-LABEL: test_floatv4 float4 test_floatv4(float4 expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok4:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET1:.*]] = call spir_func [[TY1:.*]] @llvm.spv.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok4]]) ] - // CHECK-DXIL: %[[RET1:.*]] = call [[TY1:.*]] @llvm.dx.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) + // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok4]]) ] + // CHECK-DXIL: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY1]] %[[RET1]] return WaveReadLaneAt(expr, idx); } diff --git a/clang/test/CodeGenHLSL/builtins/abs.hlsl b/clang/test/CodeGenHLSL/builtins/abs.hlsl index 912e8a2834723..a8456a715082e 100644 --- a/clang/test/CodeGenHLSL/builtins/abs.hlsl +++ b/clang/test/CodeGenHLSL/builtins/abs.hlsl @@ -22,25 +22,25 @@ int16_t3 test_abs_int16_t3(int16_t3 p0) { return abs(p0); } int16_t4 test_abs_int16_t4(int16_t4 p0) { return abs(p0); } #endif // __HLSL_ENABLE_16_BIT -// NATIVE_HALF-LABEL: define noundef half @_Z13test_abs_half -// NATIVE_HALF: call half @llvm.fabs.f16( -// NO_HALF-LABEL: define noundef float @_Z13test_abs_half -// NO_HALF: call float @llvm.fabs.f32(float %0) +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_abs_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_abs_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float %0) half test_abs_half(half p0) { return abs(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_abs_half2 -// NATIVE_HALF: call <2 x half> @llvm.fabs.v2f16( -// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_abs_half2 -// NO_HALF: call <2 x float> @llvm.fabs.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_abs_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.fabs.v2f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_abs_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fabs.v2f32( half2 test_abs_half2(half2 p0) { return abs(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_abs_half3 -// NATIVE_HALF: call <3 x half> @llvm.fabs.v3f16( -// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_abs_half3 -// NO_HALF: call <3 x float> @llvm.fabs.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_abs_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.fabs.v3f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_abs_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fabs.v3f32( half3 test_abs_half3(half3 p0) { return abs(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_abs_half4 -// NATIVE_HALF: call <4 x half> @llvm.fabs.v4f16( -// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_abs_half4 -// NO_HALF: call <4 x float> @llvm.fabs.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_abs_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.fabs.v4f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_abs_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fabs.v4f32( half4 test_abs_half4(half4 p0) { return abs(p0); } // CHECK-LABEL: define noundef i32 @_Z12test_abs_int @@ -56,17 +56,17 @@ int3 test_abs_int3(int3 p0) { return abs(p0); } // CHECK: call <4 x i32> @llvm.abs.v4i32( int4 test_abs_int4(int4 p0) { return abs(p0); } -// CHECK-LABEL: define noundef float @_Z14test_abs_float -// CHECK: call float @llvm.fabs.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_abs_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32( float test_abs_float(float p0) { return abs(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z15test_abs_float2 -// CHECK: call <2 x float> @llvm.fabs.v2f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_abs_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fabs.v2f32( float2 test_abs_float2(float2 p0) { return abs(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z15test_abs_float3 -// CHECK: call <3 x float> @llvm.fabs.v3f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_abs_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fabs.v3f32( float3 test_abs_float3(float3 p0) { return abs(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z15test_abs_float4 -// CHECK: call <4 x float> @llvm.fabs.v4f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_abs_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fabs.v4f32( float4 test_abs_float4(float4 p0) { return abs(p0); } // CHECK-LABEL: define noundef i64 @_Z16test_abs_int64_t @@ -82,15 +82,15 @@ int64_t3 test_abs_int64_t3(int64_t3 p0) { return abs(p0); } // CHECK: call <4 x i64> @llvm.abs.v4i64( int64_t4 test_abs_int64_t4(int64_t4 p0) { return abs(p0); } -// CHECK-LABEL: define noundef double @_Z15test_abs_double -// CHECK: call double @llvm.fabs.f64( +// CHECK-LABEL: define noundef nofpclass(nan inf) double @_Z15test_abs_double +// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.fabs.f64( double test_abs_double(double p0) { return abs(p0); } -// CHECK-LABEL: define noundef <2 x double> @_Z16test_abs_double2 -// CHECK: call <2 x double> @llvm.fabs.v2f64( +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x double> @_Z16test_abs_double2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.fabs.v2f64( double2 test_abs_double2(double2 p0) { return abs(p0); } -// CHECK-LABEL: define noundef <3 x double> @_Z16test_abs_double3 -// CHECK: call <3 x double> @llvm.fabs.v3f64( +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x double> @_Z16test_abs_double3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.fabs.v3f64( double3 test_abs_double3(double3 p0) { return abs(p0); } -// CHECK-LABEL: define noundef <4 x double> @_Z16test_abs_double4 -// CHECK: call <4 x double> @llvm.fabs.v4f64( +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x double> @_Z16test_abs_double4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.fabs.v4f64( double4 test_abs_double4(double4 p0) { return abs(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/acos.hlsl b/clang/test/CodeGenHLSL/builtins/acos.hlsl index 78a05cadfcd43..8152339a34e87 100644 --- a/clang/test/CodeGenHLSL/builtins/acos.hlsl +++ b/clang/test/CodeGenHLSL/builtins/acos.hlsl @@ -7,53 +7,53 @@ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF // CHECK-LABEL: test_acos_half -// NATIVE_HALF: call half @llvm.acos.f16 -// NO_HALF: call float @llvm.acos.f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.acos.f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.acos.f32 half test_acos_half ( half p0 ) { return acos ( p0 ); } // CHECK-LABEL: test_acos_half2 -// NATIVE_HALF: call <2 x half> @llvm.acos.v2f16 -// NO_HALF: call <2 x float> @llvm.acos.v2f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.acos.v2f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.acos.v2f32 half2 test_acos_half2 ( half2 p0 ) { return acos ( p0 ); } // CHECK-LABEL: test_acos_half3 -// NATIVE_HALF: call <3 x half> @llvm.acos.v3f16 -// NO_HALF: call <3 x float> @llvm.acos.v3f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.acos.v3f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.acos.v3f32 half3 test_acos_half3 ( half3 p0 ) { return acos ( p0 ); } // CHECK-LABEL: test_acos_half4 -// NATIVE_HALF: call <4 x half> @llvm.acos.v4f16 -// NO_HALF: call <4 x float> @llvm.acos.v4f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.acos.v4f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.acos.v4f32 half4 test_acos_half4 ( half4 p0 ) { return acos ( p0 ); } // CHECK-LABEL: test_acos_float -// CHECK: call float @llvm.acos.f32 +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.acos.f32 float test_acos_float ( float p0 ) { return acos ( p0 ); } // CHECK-LABEL: test_acos_float2 -// CHECK: call <2 x float> @llvm.acos.v2f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.acos.v2f32 float2 test_acos_float2 ( float2 p0 ) { return acos ( p0 ); } // CHECK-LABEL: test_acos_float3 -// CHECK: call <3 x float> @llvm.acos.v3f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.acos.v3f32 float3 test_acos_float3 ( float3 p0 ) { return acos ( p0 ); } // CHECK-LABEL: test_acos_float4 -// CHECK: call <4 x float> @llvm.acos.v4f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.acos.v4f32 float4 test_acos_float4 ( float4 p0 ) { return acos ( p0 ); } diff --git a/clang/test/CodeGenHLSL/builtins/asdouble.hlsl b/clang/test/CodeGenHLSL/builtins/asdouble.hlsl index f1c31107cdcad..bfc7fc97854c5 100644 --- a/clang/test/CodeGenHLSL/builtins/asdouble.hlsl +++ b/clang/test/CodeGenHLSL/builtins/asdouble.hlsl @@ -18,7 +18,7 @@ double test_uint(uint low, uint high) { // CHECK-SPV-SAME: {{.*}} // CHECK-SPV: bitcast <2 x i32> %[[SHUFFLE0]] to double - // CHECK-DXIL: call double @llvm.dx.asdouble.i32 + // CHECK-DXIL: call reassoc nnan ninf nsz arcp afn double @llvm.dx.asdouble.i32 return asdouble(low, high); } @@ -30,7 +30,7 @@ double3 test_vuint(uint3 low, uint3 high) { // CHECK-SPV-SAME: {{.*}} // CHECK-SPV: bitcast <6 x i32> %[[SHUFFLE1]] to <3 x double> - // CHECK-DXIL: call <3 x double> @llvm.dx.asdouble.v3i32 + // CHECK-DXIL: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.dx.asdouble.v3i32 return asdouble(low, high); } diff --git a/clang/test/CodeGenHLSL/builtins/asin.hlsl b/clang/test/CodeGenHLSL/builtins/asin.hlsl index 1d0b457e336f5..16efbba79670e 100644 --- a/clang/test/CodeGenHLSL/builtins/asin.hlsl +++ b/clang/test/CodeGenHLSL/builtins/asin.hlsl @@ -7,53 +7,53 @@ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF // CHECK-LABEL: test_asin_half -// NATIVE_HALF: call half @llvm.asin.f16 -// NO_HALF: call float @llvm.asin.f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.asin.f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.asin.f32 half test_asin_half ( half p0 ) { return asin ( p0 ); } // CHECK-LABEL: test_asin_half2 -// NATIVE_HALF: call <2 x half> @llvm.asin.v2f16 -// NO_HALF: call <2 x float> @llvm.asin.v2f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.asin.v2f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.asin.v2f32 half2 test_asin_half2 ( half2 p0 ) { return asin ( p0 ); } // CHECK-LABEL: test_asin_half3 -// NATIVE_HALF: call <3 x half> @llvm.asin.v3f16 -// NO_HALF: call <3 x float> @llvm.asin.v3f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.asin.v3f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.asin.v3f32 half3 test_asin_half3 ( half3 p0 ) { return asin ( p0 ); } // CHECK-LABEL: test_asin_half4 -// NATIVE_HALF: call <4 x half> @llvm.asin.v4f16 -// NO_HALF: call <4 x float> @llvm.asin.v4f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.asin.v4f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.asin.v4f32 half4 test_asin_half4 ( half4 p0 ) { return asin ( p0 ); } // CHECK-LABEL: test_asin_float -// CHECK: call float @llvm.asin.f32 +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.asin.f32 float test_asin_float ( float p0 ) { return asin ( p0 ); } // CHECK-LABEL: test_asin_float2 -// CHECK: call <2 x float> @llvm.asin.v2f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.asin.v2f32 float2 test_asin_float2 ( float2 p0 ) { return asin ( p0 ); } // CHECK-LABEL: test_asin_float3 -// CHECK: call <3 x float> @llvm.asin.v3f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.asin.v3f32 float3 test_asin_float3 ( float3 p0 ) { return asin ( p0 ); } // CHECK-LABEL: test_asin_float4 -// CHECK: call <4 x float> @llvm.asin.v4f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.asin.v4f32 float4 test_asin_float4 ( float4 p0 ) { return asin ( p0 ); } diff --git a/clang/test/CodeGenHLSL/builtins/atan.hlsl b/clang/test/CodeGenHLSL/builtins/atan.hlsl index faee1227f3595..437835a863703 100644 --- a/clang/test/CodeGenHLSL/builtins/atan.hlsl +++ b/clang/test/CodeGenHLSL/builtins/atan.hlsl @@ -7,53 +7,53 @@ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF // CHECK-LABEL: test_atan_half -// NATIVE_HALF: call half @llvm.atan.f16 -// NO_HALF: call float @llvm.atan.f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.atan.f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.atan.f32 half test_atan_half ( half p0 ) { return atan ( p0 ); } // CHECK-LABEL: test_atan_half2 -// NATIVE_HALF: call <2 x half> @llvm.atan.v2f16 -// NO_HALF: call <2 x float> @llvm.atan.v2f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.atan.v2f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan.v2f32 half2 test_atan_half2 ( half2 p0 ) { return atan ( p0 ); } // CHECK-LABEL: test_atan_half3 -// NATIVE_HALF: call <3 x half> @llvm.atan.v3f16 -// NO_HALF: call <3 x float> @llvm.atan.v3f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.atan.v3f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan.v3f32 half3 test_atan_half3 ( half3 p0 ) { return atan ( p0 ); } // CHECK-LABEL: test_atan_half4 -// NATIVE_HALF: call <4 x half> @llvm.atan.v4f16 -// NO_HALF: call <4 x float> @llvm.atan.v4f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.atan.v4f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan.v4f32 half4 test_atan_half4 ( half4 p0 ) { return atan ( p0 ); } // CHECK-LABEL: test_atan_float -// CHECK: call float @llvm.atan.f32 +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.atan.f32 float test_atan_float ( float p0 ) { return atan ( p0 ); } // CHECK-LABEL: test_atan_float2 -// CHECK: call <2 x float> @llvm.atan.v2f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan.v2f32 float2 test_atan_float2 ( float2 p0 ) { return atan ( p0 ); } // CHECK-LABEL: test_atan_float3 -// CHECK: call <3 x float> @llvm.atan.v3f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan.v3f32 float3 test_atan_float3 ( float3 p0 ) { return atan ( p0 ); } // CHECK-LABEL: test_atan_float4 -// CHECK: call <4 x float> @llvm.atan.v4f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan.v4f32 float4 test_atan_float4 ( float4 p0 ) { return atan ( p0 ); } diff --git a/clang/test/CodeGenHLSL/builtins/atan2.hlsl b/clang/test/CodeGenHLSL/builtins/atan2.hlsl index 40796052e608f..53d115641e72f 100644 --- a/clang/test/CodeGenHLSL/builtins/atan2.hlsl +++ b/clang/test/CodeGenHLSL/builtins/atan2.hlsl @@ -7,53 +7,53 @@ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF // CHECK-LABEL: test_atan2_half -// NATIVE_HALF: call half @llvm.atan2.f16 -// NO_HALF: call float @llvm.atan2.f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.atan2.f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.atan2.f32 half test_atan2_half (half p0, half p1) { return atan2(p0, p1); } // CHECK-LABEL: test_atan2_half2 -// NATIVE_HALF: call <2 x half> @llvm.atan2.v2f16 -// NO_HALF: call <2 x float> @llvm.atan2.v2f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.atan2.v2f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32 half2 test_atan2_half2 (half2 p0, half2 p1) { return atan2(p0, p1); } // CHECK-LABEL: test_atan2_half3 -// NATIVE_HALF: call <3 x half> @llvm.atan2.v3f16 -// NO_HALF: call <3 x float> @llvm.atan2.v3f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.atan2.v3f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan2.v3f32 half3 test_atan2_half3 (half3 p0, half3 p1) { return atan2(p0, p1); } // CHECK-LABEL: test_atan2_half4 -// NATIVE_HALF: call <4 x half> @llvm.atan2.v4f16 -// NO_HALF: call <4 x float> @llvm.atan2.v4f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.atan2.v4f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32 half4 test_atan2_half4 (half4 p0, half4 p1) { return atan2(p0, p1); } // CHECK-LABEL: test_atan2_float -// CHECK: call float @llvm.atan2.f32 +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.atan2.f32 float test_atan2_float (float p0, float p1) { return atan2(p0, p1); } // CHECK-LABEL: test_atan2_float2 -// CHECK: call <2 x float> @llvm.atan2.v2f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32 float2 test_atan2_float2 (float2 p0, float2 p1) { return atan2(p0, p1); } // CHECK-LABEL: test_atan2_float3 -// CHECK: call <3 x float> @llvm.atan2.v3f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan2.v3f32 float3 test_atan2_float3 (float3 p0, float3 p1) { return atan2(p0, p1); } // CHECK-LABEL: test_atan2_float4 -// CHECK: call <4 x float> @llvm.atan2.v4f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32 float4 test_atan2_float4 (float4 p0, float4 p1) { return atan2(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/ceil.hlsl b/clang/test/CodeGenHLSL/builtins/ceil.hlsl index 3aa78ec0ebcca..fe0b8f8983838 100644 --- a/clang/test/CodeGenHLSL/builtins/ceil.hlsl +++ b/clang/test/CodeGenHLSL/builtins/ceil.hlsl @@ -7,36 +7,36 @@ using hlsl::ceil; -// NATIVE_HALF-LABEL: define noundef half @_Z14test_ceil_half -// NATIVE_HALF: call half @llvm.ceil.f16( -// NO_HALF-LABEL: define noundef float @_Z14test_ceil_half -// NO_HALF: call float @llvm.ceil.f32(float %0) +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z14test_ceil_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.ceil.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z14test_ceil_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.ceil.f32(float %0) half test_ceil_half(half p0) { return ceil(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z15test_ceil_half2 -// NATIVE_HALF: call <2 x half> @llvm.ceil.v2f16( -// NO_HALF-LABEL: define noundef <2 x float> @_Z15test_ceil_half2 -// NO_HALF: call <2 x float> @llvm.ceil.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z15test_ceil_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.ceil.v2f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_ceil_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.ceil.v2f32( half2 test_ceil_half2(half2 p0) { return ceil(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z15test_ceil_half3 -// NATIVE_HALF: call <3 x half> @llvm.ceil.v3f16( -// NO_HALF-LABEL: define noundef <3 x float> @_Z15test_ceil_half3 -// NO_HALF: call <3 x float> @llvm.ceil.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z15test_ceil_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.ceil.v3f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_ceil_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.ceil.v3f32( half3 test_ceil_half3(half3 p0) { return ceil(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z15test_ceil_half4 -// NATIVE_HALF: call <4 x half> @llvm.ceil.v4f16( -// NO_HALF-LABEL: define noundef <4 x float> @_Z15test_ceil_half4 -// NO_HALF: call <4 x float> @llvm.ceil.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z15test_ceil_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.ceil.v4f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_ceil_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.ceil.v4f32( half4 test_ceil_half4(half4 p0) { return ceil(p0); } -// CHECK-LABEL: define noundef float @_Z15test_ceil_float -// CHECK: call float @llvm.ceil.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z15test_ceil_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.ceil.f32( float test_ceil_float(float p0) { return ceil(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z16test_ceil_float2 -// CHECK: call <2 x float> @llvm.ceil.v2f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_ceil_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.ceil.v2f32( float2 test_ceil_float2(float2 p0) { return ceil(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z16test_ceil_float3 -// CHECK: call <3 x float> @llvm.ceil.v3f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_ceil_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.ceil.v3f32( float3 test_ceil_float3(float3 p0) { return ceil(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z16test_ceil_float4 -// CHECK: call <4 x float> @llvm.ceil.v4f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_ceil_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.ceil.v4f32( float4 test_ceil_float4(float4 p0) { return ceil(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl index 3489f3d3e2b09..d01c2a45c43c8 100644 --- a/clang/test/CodeGenHLSL/builtins/clamp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl @@ -1,19 +1,19 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ // RUN: -fnative-half-type -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DTARGET=dx -DFNATTRS=noundef +// RUN: -DTARGET=dx -DFNATTRS=noundef -DFFNATTRS="nofpclass(nan inf)" // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DTARGET=dx -DFNATTRS=noundef +// RUN: -DTARGET=dx -DFNATTRS=noundef -DFFNATTRS="nofpclass(nan inf)" // RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s \ // RUN: -fnative-half-type -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef" +// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef" -DFFNATTRS="nofpclass(nan inf)" // RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s \ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef" +// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef" -DFFNATTRS="nofpclass(nan inf)" #ifdef __HLSL_ENABLE_16_BIT // NATIVE_HALF: define [[FNATTRS]] i16 @_Z16test_clamp_short @@ -95,49 +95,49 @@ uint64_t3 test_clamp_ulong3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1 // CHECK: call <4 x i64> @llvm.[[TARGET]].uclamp.v4i64 uint64_t4 test_clamp_ulong4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); } -// NATIVE_HALF: define [[FNATTRS]] half @_Z15test_clamp_half -// NATIVE_HALF: call half @llvm.[[TARGET]].nclamp.f16( -// NO_HALF: define [[FNATTRS]] float @_Z15test_clamp_half -// NO_HALF: call float @llvm.[[TARGET]].nclamp.f32( +// NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] half @_Z15test_clamp_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].nclamp.f16( +// NO_HALF: define [[FNATTRS]] [[FFNATTRS]] float @_Z15test_clamp_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].nclamp.f32( half test_clamp_half(half p0, half p1) { return clamp(p0, p1,p1); } -// NATIVE_HALF: define [[FNATTRS]] <2 x half> @_Z16test_clamp_half2 -// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].nclamp.v2f16 -// NO_HALF: define [[FNATTRS]] <2 x float> @_Z16test_clamp_half2 -// NO_HALF: call <2 x float> @llvm.[[TARGET]].nclamp.v2f32( +// NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] <2 x half> @_Z16test_clamp_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].nclamp.v2f16 +// NO_HALF: define [[FNATTRS]] [[FFNATTRS]] <2 x float> @_Z16test_clamp_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].nclamp.v2f32( half2 test_clamp_half2(half2 p0, half2 p1) { return clamp(p0, p1,p1); } -// NATIVE_HALF: define [[FNATTRS]] <3 x half> @_Z16test_clamp_half3 -// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].nclamp.v3f16 -// NO_HALF: define [[FNATTRS]] <3 x float> @_Z16test_clamp_half3 -// NO_HALF: call <3 x float> @llvm.[[TARGET]].nclamp.v3f32( +// NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] <3 x half> @_Z16test_clamp_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].nclamp.v3f16 +// NO_HALF: define [[FNATTRS]] [[FFNATTRS]] <3 x float> @_Z16test_clamp_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].nclamp.v3f32( half3 test_clamp_half3(half3 p0, half3 p1) { return clamp(p0, p1,p1); } -// NATIVE_HALF: define [[FNATTRS]] <4 x half> @_Z16test_clamp_half4 -// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].nclamp.v4f16 -// NO_HALF: define [[FNATTRS]] <4 x float> @_Z16test_clamp_half4 -// NO_HALF: call <4 x float> @llvm.[[TARGET]].nclamp.v4f32( +// NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] <4 x half> @_Z16test_clamp_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].nclamp.v4f16 +// NO_HALF: define [[FNATTRS]] [[FFNATTRS]] <4 x float> @_Z16test_clamp_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].nclamp.v4f32( half4 test_clamp_half4(half4 p0, half4 p1) { return clamp(p0, p1,p1); } -// CHECK: define [[FNATTRS]] float @_Z16test_clamp_float -// CHECK: call float @llvm.[[TARGET]].nclamp.f32( +// CHECK: define [[FNATTRS]] [[FFNATTRS]] float @_Z16test_clamp_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].nclamp.f32( float test_clamp_float(float p0, float p1) { return clamp(p0, p1,p1); } -// CHECK: define [[FNATTRS]] <2 x float> @_Z17test_clamp_float2 -// CHECK: call <2 x float> @llvm.[[TARGET]].nclamp.v2f32 +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <2 x float> @_Z17test_clamp_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].nclamp.v2f32 float2 test_clamp_float2(float2 p0, float2 p1) { return clamp(p0, p1,p1); } -// CHECK: define [[FNATTRS]] <3 x float> @_Z17test_clamp_float3 -// CHECK: call <3 x float> @llvm.[[TARGET]].nclamp.v3f32 +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <3 x float> @_Z17test_clamp_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].nclamp.v3f32 float3 test_clamp_float3(float3 p0, float3 p1) { return clamp(p0, p1,p1); } -// CHECK: define [[FNATTRS]] <4 x float> @_Z17test_clamp_float4 -// CHECK: call <4 x float> @llvm.[[TARGET]].nclamp.v4f32 +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x float> @_Z17test_clamp_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].nclamp.v4f32 float4 test_clamp_float4(float4 p0, float4 p1) { return clamp(p0, p1,p1); } -// CHECK: define [[FNATTRS]] double @_Z17test_clamp_double -// CHECK: call double @llvm.[[TARGET]].nclamp.f64( +// CHECK: define [[FNATTRS]] [[FFNATTRS]] double @_Z17test_clamp_double +// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.[[TARGET]].nclamp.f64( double test_clamp_double(double p0, double p1) { return clamp(p0, p1,p1); } -// CHECK: define [[FNATTRS]] <2 x double> @_Z18test_clamp_double2 -// CHECK: call <2 x double> @llvm.[[TARGET]].nclamp.v2f64 +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <2 x double> @_Z18test_clamp_double2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.[[TARGET]].nclamp.v2f64 double2 test_clamp_double2(double2 p0, double2 p1) { return clamp(p0, p1,p1); } -// CHECK: define [[FNATTRS]] <3 x double> @_Z18test_clamp_double3 -// CHECK: call <3 x double> @llvm.[[TARGET]].nclamp.v3f64 +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <3 x double> @_Z18test_clamp_double3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.[[TARGET]].nclamp.v3f64 double3 test_clamp_double3(double3 p0, double3 p1) { return clamp(p0, p1,p1); } -// CHECK: define [[FNATTRS]] <4 x double> @_Z18test_clamp_double4 -// CHECK: call <4 x double> @llvm.[[TARGET]].nclamp.v4f64 +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x double> @_Z18test_clamp_double4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64 double4 test_clamp_double4(double4 p0, double4 p1) { return clamp(p0, p1,p1); } diff --git a/clang/test/CodeGenHLSL/builtins/clip.hlsl b/clang/test/CodeGenHLSL/builtins/clip.hlsl index 63843c151e3ac..5a1753766a8a1 100644 --- a/clang/test/CodeGenHLSL/builtins/clip.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clip.hlsl @@ -5,13 +5,13 @@ void test_scalar(float Buf) { // CHECK: define void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]]) // CHECK: [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4 - // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00 + // CHECK-NEXT: [[FCMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt float [[LOAD]], 0.000000e+00 // CHECK-NO: call i1 @llvm.dx.any // CHECK-NEXT: call void @llvm.dx.discard(i1 [[FCMP]]) // // SPIRV: define spir_func void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]]) // SPIRV: [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4 - // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00 + // SPIRV-NEXT: [[FCMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt float [[LOAD]], 0.000000e+00 // SPIRV-NO: call i1 @llvm.spv.any // SPIRV-NEXT: br i1 [[FCMP]], label %[[LTL:.*]], label %[[ENDL:.*]] // SPIRV: [[LTL]]: ; preds = %entry @@ -23,13 +23,13 @@ void test_scalar(float Buf) { void test_vector4(float4 Buf) { // CHECK: define void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]]) // CHECK: [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16 - // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer + // CHECK-NEXT: [[FCMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt <4 x float> [[LOAD]], zeroinitializer // CHECK-NEXT: [[ANYC:%.*]] = call i1 @llvm.dx.any.v4i1(<4 x i1> [[FCMP]]) // CHECK-NEXT: call void @llvm.dx.discard(i1 [[ANYC]]) // // SPIRV: define spir_func void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]]) // SPIRV: [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16 - // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer + // SPIRV-NEXT: [[FCMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt <4 x float> [[LOAD]], zeroinitializer // SPIRV-NEXT: [[ANYC:%.*]] = call i1 @llvm.spv.any.v4i1(<4 x i1> [[FCMP]]) // SPIRV-NEXT: br i1 [[ANYC]], label %[[LTL:.*]], label %[[ENDL:.*]] // SPIRV: [[LTL]]: ; preds = %entry diff --git a/clang/test/CodeGenHLSL/builtins/cos.hlsl b/clang/test/CodeGenHLSL/builtins/cos.hlsl index 4a41a9ec4a7ca..5f993d50498bf 100644 --- a/clang/test/CodeGenHLSL/builtins/cos.hlsl +++ b/clang/test/CodeGenHLSL/builtins/cos.hlsl @@ -5,36 +5,36 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z13test_cos_half -// NATIVE_HALF: call half @llvm.cos.f16( -// NO_HALF-LABEL: define noundef float @_Z13test_cos_half -// NO_HALF: call float @llvm.cos.f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_cos_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.cos.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_cos_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.cos.f32( half test_cos_half(half p0) { return cos(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_cos_half2 -// NATIVE_HALF: call <2 x half> @llvm.cos.v2f16 -// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_cos_half2 -// NO_HALF: call <2 x float> @llvm.cos.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_cos_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.cos.v2f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_cos_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.cos.v2f32( half2 test_cos_half2(half2 p0) { return cos(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_cos_half3 -// NATIVE_HALF: call <3 x half> @llvm.cos.v3f16 -// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_cos_half3 -// NO_HALF: call <3 x float> @llvm.cos.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_cos_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.cos.v3f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_cos_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.cos.v3f32( half3 test_cos_half3(half3 p0) { return cos(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_cos_half4 -// NATIVE_HALF: call <4 x half> @llvm.cos.v4f16 -// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_cos_half4 -// NO_HALF: call <4 x float> @llvm.cos.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_cos_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.cos.v4f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_cos_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.cos.v4f32( half4 test_cos_half4(half4 p0) { return cos(p0); } -// CHECK-LABEL: define noundef float @_Z14test_cos_float -// CHECK: call float @llvm.cos.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_cos_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.cos.f32( float test_cos_float(float p0) { return cos(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z15test_cos_float2 -// CHECK: call <2 x float> @llvm.cos.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_cos_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.cos.v2f32 float2 test_cos_float2(float2 p0) { return cos(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z15test_cos_float3 -// CHECK: call <3 x float> @llvm.cos.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_cos_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.cos.v3f32 float3 test_cos_float3(float3 p0) { return cos(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z15test_cos_float4 -// CHECK: call <4 x float> @llvm.cos.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_cos_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.cos.v4f32 float4 test_cos_float4(float4 p0) { return cos(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/cosh.hlsl b/clang/test/CodeGenHLSL/builtins/cosh.hlsl index a19240497b831..07c64206412db 100644 --- a/clang/test/CodeGenHLSL/builtins/cosh.hlsl +++ b/clang/test/CodeGenHLSL/builtins/cosh.hlsl @@ -7,53 +7,53 @@ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF // CHECK-LABEL: test_cosh_half -// NATIVE_HALF: call half @llvm.cosh.f16 -// NO_HALF: call float @llvm.cosh.f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.cosh.f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.cosh.f32 half test_cosh_half ( half p0 ) { return cosh ( p0 ); } // CHECK-LABEL: test_cosh_half2 -// NATIVE_HALF: call <2 x half> @llvm.cosh.v2f16 -// NO_HALF: call <2 x float> @llvm.cosh.v2f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.cosh.v2f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.cosh.v2f32 half2 test_cosh_half2 ( half2 p0 ) { return cosh ( p0 ); } // CHECK-LABEL: test_cosh_half3 -// NATIVE_HALF: call <3 x half> @llvm.cosh.v3f16 -// NO_HALF: call <3 x float> @llvm.cosh.v3f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.cosh.v3f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.cosh.v3f32 half3 test_cosh_half3 ( half3 p0 ) { return cosh ( p0 ); } // CHECK-LABEL: test_cosh_half4 -// NATIVE_HALF: call <4 x half> @llvm.cosh.v4f16 -// NO_HALF: call <4 x float> @llvm.cosh.v4f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.cosh.v4f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.cosh.v4f32 half4 test_cosh_half4 ( half4 p0 ) { return cosh ( p0 ); } // CHECK-LABEL: test_cosh_float -// CHECK: call float @llvm.cosh.f32 +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.cosh.f32 float test_cosh_float ( float p0 ) { return cosh ( p0 ); } // CHECK-LABEL: test_cosh_float2 -// CHECK: call <2 x float> @llvm.cosh.v2f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.cosh.v2f32 float2 test_cosh_float2 ( float2 p0 ) { return cosh ( p0 ); } // CHECK-LABEL: test_cosh_float3 -// CHECK: call <3 x float> @llvm.cosh.v3f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.cosh.v3f32 float3 test_cosh_float3 ( float3 p0 ) { return cosh ( p0 ); } // CHECK-LABEL: test_cosh_float4 -// CHECK: call <4 x float> @llvm.cosh.v4f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.cosh.v4f32 float4 test_cosh_float4 ( float4 p0 ) { return cosh ( p0 ); } diff --git a/clang/test/CodeGenHLSL/builtins/cross.hlsl b/clang/test/CodeGenHLSL/builtins/cross.hlsl index 514e57d36b201..b2a1d6316787d 100644 --- a/clang/test/CodeGenHLSL/builtins/cross.hlsl +++ b/clang/test/CodeGenHLSL/builtins/cross.hlsl @@ -2,26 +2,26 @@ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // NATIVE_HALF: define [[FNATTRS]] <3 x half> @ -// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].cross.v3f16(<3 x half> +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].cross.v3f16(<3 x half> // NATIVE_HALF: ret <3 x half> %hlsl.cross // NO_HALF: define [[FNATTRS]] <3 x float> @ -// NO_HALF: call <3 x float> @llvm.[[TARGET]].cross.v3f32(<3 x float> +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].cross.v3f32(<3 x float> // NO_HALF: ret <3 x float> %hlsl.cross half3 test_cross_half3(half3 p0, half3 p1) { @@ -29,7 +29,7 @@ half3 test_cross_half3(half3 p0, half3 p1) } // CHECK: define [[FNATTRS]] <3 x float> @ -// CHECK: %hlsl.cross = call <3 x float> @llvm.[[TARGET]].cross.v3f32( +// CHECK: %hlsl.cross = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].cross.v3f32( // CHECK: ret <3 x float> %hlsl.cross float3 test_cross_float3(float3 p0, float3 p1) { diff --git a/clang/test/CodeGenHLSL/builtins/degrees.hlsl b/clang/test/CodeGenHLSL/builtins/degrees.hlsl index 9e131f4badc19..64531dd2785eb 100644 --- a/clang/test/CodeGenHLSL/builtins/degrees.hlsl +++ b/clang/test/CodeGenHLSL/builtins/degrees.hlsl @@ -2,63 +2,63 @@ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // RUN: %clang_cc1 -finclude-default-header -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // NATIVE_HALF: define [[FNATTRS]] half @ -// NATIVE_HALF: %hlsl.degrees = call half @llvm.[[TARGET]].degrees.f16( +// NATIVE_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].degrees.f16( // NATIVE_HALF: ret half %hlsl.degrees // NO_HALF: define [[FNATTRS]] float @ -// NO_HALF: %hlsl.degrees = call float @llvm.[[TARGET]].degrees.f32( +// NO_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].degrees.f32( // NO_HALF: ret float %hlsl.degrees half test_degrees_half(half p0) { return degrees(p0); } // NATIVE_HALF: define [[FNATTRS]] <2 x half> @ -// NATIVE_HALF: %hlsl.degrees = call <2 x half> @llvm.[[TARGET]].degrees.v2f16 +// NATIVE_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].degrees.v2f16 // NATIVE_HALF: ret <2 x half> %hlsl.degrees // NO_HALF: define [[FNATTRS]] <2 x float> @ -// NO_HALF: %hlsl.degrees = call <2 x float> @llvm.[[TARGET]].degrees.v2f32( +// NO_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].degrees.v2f32( // NO_HALF: ret <2 x float> %hlsl.degrees half2 test_degrees_half2(half2 p0) { return degrees(p0); } // NATIVE_HALF: define [[FNATTRS]] <3 x half> @ -// NATIVE_HALF: %hlsl.degrees = call <3 x half> @llvm.[[TARGET]].degrees.v3f16 +// NATIVE_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].degrees.v3f16 // NATIVE_HALF: ret <3 x half> %hlsl.degrees // NO_HALF: define [[FNATTRS]] <3 x float> @ -// NO_HALF: %hlsl.degrees = call <3 x float> @llvm.[[TARGET]].degrees.v3f32( +// NO_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].degrees.v3f32( // NO_HALF: ret <3 x float> %hlsl.degrees half3 test_degrees_half3(half3 p0) { return degrees(p0); } // NATIVE_HALF: define [[FNATTRS]] <4 x half> @ -// NATIVE_HALF: %hlsl.degrees = call <4 x half> @llvm.[[TARGET]].degrees.v4f16 +// NATIVE_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].degrees.v4f16 // NATIVE_HALF: ret <4 x half> %hlsl.degrees // NO_HALF: define [[FNATTRS]] <4 x float> @ -// NO_HALF: %hlsl.degrees = call <4 x float> @llvm.[[TARGET]].degrees.v4f32( +// NO_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].degrees.v4f32( // NO_HALF: ret <4 x float> %hlsl.degrees half4 test_degrees_half4(half4 p0) { return degrees(p0); } // CHECK: define [[FNATTRS]] float @ -// CHECK: %hlsl.degrees = call float @llvm.[[TARGET]].degrees.f32( +// CHECK: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].degrees.f32( // CHECK: ret float %hlsl.degrees float test_degrees_float(float p0) { return degrees(p0); } // CHECK: define [[FNATTRS]] <2 x float> @ -// CHECK: %hlsl.degrees = call <2 x float> @llvm.[[TARGET]].degrees.v2f32 +// CHECK: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].degrees.v2f32 // CHECK: ret <2 x float> %hlsl.degrees float2 test_degrees_float2(float2 p0) { return degrees(p0); } // CHECK: define [[FNATTRS]] <3 x float> @ -// CHECK: %hlsl.degrees = call <3 x float> @llvm.[[TARGET]].degrees.v3f32 +// CHECK: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].degrees.v3f32 // CHECK: ret <3 x float> %hlsl.degrees float3 test_degrees_float3(float3 p0) { return degrees(p0); } // CHECK: define [[FNATTRS]] <4 x float> @ -// CHECK: %hlsl.degrees = call <4 x float> @llvm.[[TARGET]].degrees.v4f32 +// CHECK: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].degrees.v4f32 // CHECK: ret <4 x float> %hlsl.degrees float4 test_degrees_float4(float4 p0) { return degrees(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl index 482f089d4770f..4045169d6516c 100644 --- a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl @@ -2,8 +2,8 @@ // CHECK-LABEL: builtin_bool_to_float_type_promotion // CHECK: %conv1 = uitofp i1 %loadedv to double -// CHECK: %hlsl.dot = fmul double %conv, %conv1 -// CHECK: %conv2 = fptrunc double %hlsl.dot to float +// CHECK: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn double %conv, %conv1 +// CHECK: %conv2 = fptrunc reassoc nnan ninf nsz arcp afn double %hlsl.dot to float // CHECK: ret float %conv2 float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) { return __builtin_hlsl_dot ( p0, p1 ); @@ -11,19 +11,19 @@ float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) { // CHECK-LABEL: builtin_bool_to_float_arg1_type_promotion // CHECK: %conv = uitofp i1 %loadedv to double -// CHECK: %conv1 = fpext float %1 to double -// CHECK: %hlsl.dot = fmul double %conv, %conv1 -// CHECK: %conv2 = fptrunc double %hlsl.dot to float +// CHECK: %conv1 = fpext reassoc nnan ninf nsz arcp afn float %1 to double +// CHECK: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn double %conv, %conv1 +// CHECK: %conv2 = fptrunc reassoc nnan ninf nsz arcp afn double %hlsl.dot to float // CHECK: ret float %conv2 float builtin_bool_to_float_arg1_type_promotion ( bool p0, float p1 ) { return __builtin_hlsl_dot ( p0, p1 ); } // CHECK-LABEL: builtin_dot_int_to_float_promotion -// CHECK: %conv = fpext float %0 to double +// CHECK: %conv = fpext reassoc nnan ninf nsz arcp afn float %0 to double // CHECK: %conv1 = sitofp i32 %1 to double -// CHECK: dot = fmul double %conv, %conv1 -// CHECK: %conv2 = fptrunc double %hlsl.dot to float +// CHECK: dot = fmul reassoc nnan ninf nsz arcp afn double %conv, %conv1 +// CHECK: %conv2 = fptrunc reassoc nnan ninf nsz arcp afn double %hlsl.dot to float // CHECK: ret float %conv2 float builtin_dot_int_to_float_promotion ( float p0, int p1 ) { return __builtin_hlsl_dot ( p0, p1 ); diff --git a/clang/test/CodeGenHLSL/builtins/dot.hlsl b/clang/test/CodeGenHLSL/builtins/dot.hlsl index 3f6be04a595e2..7064066780da6 100644 --- a/clang/test/CodeGenHLSL/builtins/dot.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot.hlsl @@ -115,46 +115,46 @@ uint16_t test_dot_ushort3(uint16_t3 p0, uint16_t3 p1) { return dot(p0, p1); } uint16_t test_dot_ushort4(uint16_t4 p0, uint16_t4 p1) { return dot(p0, p1); } #endif -// NATIVE_HALF: %hlsl.dot = fmul half +// NATIVE_HALF: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn half // NATIVE_HALF: ret half %hlsl.dot -// NO_HALF: %hlsl.dot = fmul float +// NO_HALF: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn float // NO_HALF: ret float %hlsl.dot half test_dot_half(half p0, half p1) { return dot(p0, p1); } -// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v2f16(<2 x half> +// NATIVE_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn half @llvm.[[ICF]].fdot.v2f16(<2 x half> // NATIVE_HALF: ret half %hlsl.dot -// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float> +// NO_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v2f32(<2 x float> // NO_HALF: ret float %hlsl.dot half test_dot_half2(half2 p0, half2 p1) { return dot(p0, p1); } -// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v3f16(<3 x half> +// NATIVE_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn half @llvm.[[ICF]].fdot.v3f16(<3 x half> // NATIVE_HALF: ret half %hlsl.dot -// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float> +// NO_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v3f32(<3 x float> // NO_HALF: ret float %hlsl.dot half test_dot_half3(half3 p0, half3 p1) { return dot(p0, p1); } -// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v4f16(<4 x half> +// NATIVE_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn half @llvm.[[ICF]].fdot.v4f16(<4 x half> // NATIVE_HALF: ret half %hlsl.dot -// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float> +// NO_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v4f32(<4 x float> // NO_HALF: ret float %hlsl.dot half test_dot_half4(half4 p0, half4 p1) { return dot(p0, p1); } -// CHECK: %hlsl.dot = fmul float +// CHECK: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn float // CHECK: ret float %hlsl.dot float test_dot_float(float p0, float p1) { return dot(p0, p1); } -// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float> +// CHECK: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v2f32(<2 x float> // CHECK: ret float %hlsl.dot float test_dot_float2(float2 p0, float2 p1) { return dot(p0, p1); } -// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float> +// CHECK: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v3f32(<3 x float> // CHECK: ret float %hlsl.dot float test_dot_float3(float3 p0, float3 p1) { return dot(p0, p1); } -// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float> +// CHECK: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v4f32(<4 x float> // CHECK: ret float %hlsl.dot float test_dot_float4(float4 p0, float4 p1) { return dot(p0, p1); } -// CHECK: %hlsl.dot = fmul double +// CHECK: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn double // CHECK: ret double %hlsl.dot double test_dot_double(double p0, double p1) { return dot(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/exp.hlsl b/clang/test/CodeGenHLSL/builtins/exp.hlsl index 3445cfd2e71f6..6ed40ed8f433c 100644 --- a/clang/test/CodeGenHLSL/builtins/exp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/exp.hlsl @@ -5,48 +5,48 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z13test_exp_half -// NATIVE_HALF: %elt.exp = call half @llvm.exp.f16( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_exp_half +// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16( // NATIVE_HALF: ret half %elt.exp -// NO_HALF-LABEL: define noundef float @_Z13test_exp_half -// NO_HALF: %elt.exp = call float @llvm.exp.f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_exp_half +// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( // NO_HALF: ret float %elt.exp half test_exp_half(half p0) { return exp(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_exp_half2 -// NATIVE_HALF: %elt.exp = call <2 x half> @llvm.exp.v2f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_exp_half2 +// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp.v2f16 // NATIVE_HALF: ret <2 x half> %elt.exp -// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_exp_half2 -// NO_HALF: %elt.exp = call <2 x float> @llvm.exp.v2f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_exp_half2 +// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32( // NO_HALF: ret <2 x float> %elt.exp half2 test_exp_half2(half2 p0) { return exp(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_exp_half3 -// NATIVE_HALF: %elt.exp = call <3 x half> @llvm.exp.v3f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_exp_half3 +// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp.v3f16 // NATIVE_HALF: ret <3 x half> %elt.exp -// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_exp_half3 -// NO_HALF: %elt.exp = call <3 x float> @llvm.exp.v3f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_exp_half3 +// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32( // NO_HALF: ret <3 x float> %elt.exp half3 test_exp_half3(half3 p0) { return exp(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_exp_half4 -// NATIVE_HALF: %elt.exp = call <4 x half> @llvm.exp.v4f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_exp_half4 +// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp.v4f16 // NATIVE_HALF: ret <4 x half> %elt.exp -// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_exp_half4 -// NO_HALF: %elt.exp = call <4 x float> @llvm.exp.v4f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_exp_half4 +// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32( // NO_HALF: ret <4 x float> %elt.exp half4 test_exp_half4(half4 p0) { return exp(p0); } -// CHECK-LABEL: define noundef float @_Z14test_exp_float -// CHECK: %elt.exp = call float @llvm.exp.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_exp_float +// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32( // CHECK: ret float %elt.exp float test_exp_float(float p0) { return exp(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z15test_exp_float2 -// CHECK: %elt.exp = call <2 x float> @llvm.exp.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_exp_float2 +// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32 // CHECK: ret <2 x float> %elt.exp float2 test_exp_float2(float2 p0) { return exp(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z15test_exp_float3 -// CHECK: %elt.exp = call <3 x float> @llvm.exp.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_exp_float3 +// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32 // CHECK: ret <3 x float> %elt.exp float3 test_exp_float3(float3 p0) { return exp(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z15test_exp_float4 -// CHECK: %elt.exp = call <4 x float> @llvm.exp.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_exp_float4 +// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32 // CHECK: ret <4 x float> %elt.exp float4 test_exp_float4(float4 p0) { return exp(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/exp2.hlsl b/clang/test/CodeGenHLSL/builtins/exp2.hlsl index 7bfc897beee16..b067427e46368 100644 --- a/clang/test/CodeGenHLSL/builtins/exp2.hlsl +++ b/clang/test/CodeGenHLSL/builtins/exp2.hlsl @@ -5,48 +5,48 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z14test_exp2_half -// NATIVE_HALF: %elt.exp2 = call half @llvm.exp2.f16( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z14test_exp2_half +// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn half @llvm.exp2.f16( // NATIVE_HALF: ret half %elt.exp2 -// NO_HALF-LABEL: define noundef float @_Z14test_exp2_half -// NO_HALF: %elt.exp2 = call float @llvm.exp2.f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z14test_exp2_half +// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( // NO_HALF: ret float %elt.exp2 half test_exp2_half(half p0) { return exp2(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z15test_exp2_half2 -// NATIVE_HALF: %elt.exp2 = call <2 x half> @llvm.exp2.v2f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z15test_exp2_half2 +// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp2.v2f16 // NATIVE_HALF: ret <2 x half> %elt.exp2 -// NO_HALF-LABEL: define noundef <2 x float> @_Z15test_exp2_half2 -// NO_HALF: %elt.exp2 = call <2 x float> @llvm.exp2.v2f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_exp2_half2 +// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32( // NO_HALF: ret <2 x float> %elt.exp2 half2 test_exp2_half2(half2 p0) { return exp2(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z15test_exp2_half3 -// NATIVE_HALF: %elt.exp2 = call <3 x half> @llvm.exp2.v3f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z15test_exp2_half3 +// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp2.v3f16 // NATIVE_HALF: ret <3 x half> %elt.exp2 -// NO_HALF-LABEL: define noundef <3 x float> @_Z15test_exp2_half3 -// NO_HALF: %elt.exp2 = call <3 x float> @llvm.exp2.v3f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_exp2_half3 +// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32( // NO_HALF: ret <3 x float> %elt.exp2 half3 test_exp2_half3(half3 p0) { return exp2(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z15test_exp2_half4 -// NATIVE_HALF: %elt.exp2 = call <4 x half> @llvm.exp2.v4f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z15test_exp2_half4 +// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp2.v4f16 // NATIVE_HALF: ret <4 x half> %elt.exp2 -// NO_HALF-LABEL: define noundef <4 x float> @_Z15test_exp2_half4 -// NO_HALF: %elt.exp2 = call <4 x float> @llvm.exp2.v4f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_exp2_half4 +// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32( // NO_HALF: ret <4 x float> %elt.exp2 half4 test_exp2_half4(half4 p0) { return exp2(p0); } -// CHECK-LABEL: define noundef float @_Z15test_exp2_float -// CHECK: %elt.exp2 = call float @llvm.exp2.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z15test_exp2_float +// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32( // CHECK: ret float %elt.exp2 float test_exp2_float(float p0) { return exp2(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z16test_exp2_float2 -// CHECK: %elt.exp2 = call <2 x float> @llvm.exp2.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_exp2_float2 +// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32 // CHECK: ret <2 x float> %elt.exp2 float2 test_exp2_float2(float2 p0) { return exp2(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z16test_exp2_float3 -// CHECK: %elt.exp2 = call <3 x float> @llvm.exp2.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_exp2_float3 +// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32 // CHECK: ret <3 x float> %elt.exp2 float3 test_exp2_float3(float3 p0) { return exp2(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z16test_exp2_float4 -// CHECK: %elt.exp2 = call <4 x float> @llvm.exp2.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_exp2_float4 +// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32 // CHECK: ret <4 x float> %elt.exp2 float4 test_exp2_float4(float4 p0) { return exp2(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/floor.hlsl b/clang/test/CodeGenHLSL/builtins/floor.hlsl index c2d6f1bcc335c..f610baeeefd48 100644 --- a/clang/test/CodeGenHLSL/builtins/floor.hlsl +++ b/clang/test/CodeGenHLSL/builtins/floor.hlsl @@ -7,36 +7,36 @@ using hlsl::floor; -// NATIVE_HALF-LABEL: define noundef half @_Z15test_floor_half -// NATIVE_HALF: call half @llvm.floor.f16( -// NO_HALF-LABEL: define noundef float @_Z15test_floor_half -// NO_HALF: call float @llvm.floor.f32(float %0) +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z15test_floor_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.floor.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z15test_floor_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.floor.f32(float %0) half test_floor_half(half p0) { return floor(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_floor_half2 -// NATIVE_HALF: call <2 x half> @llvm.floor.v2f16( -// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_floor_half2 -// NO_HALF: call <2 x float> @llvm.floor.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z16test_floor_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.floor.v2f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_floor_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.floor.v2f32( half2 test_floor_half2(half2 p0) { return floor(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_floor_half3 -// NATIVE_HALF: call <3 x half> @llvm.floor.v3f16( -// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_floor_half3 -// NO_HALF: call <3 x float> @llvm.floor.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z16test_floor_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.floor.v3f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_floor_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.floor.v3f32( half3 test_floor_half3(half3 p0) { return floor(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_floor_half4 -// NATIVE_HALF: call <4 x half> @llvm.floor.v4f16( -// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_floor_half4 -// NO_HALF: call <4 x float> @llvm.floor.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z16test_floor_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.floor.v4f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_floor_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.floor.v4f32( half4 test_floor_half4(half4 p0) { return floor(p0); } -// CHECK-LABEL: define noundef float @_Z16test_floor_float -// CHECK: call float @llvm.floor.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z16test_floor_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.floor.f32( float test_floor_float(float p0) { return floor(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z17test_floor_float2 -// CHECK: call <2 x float> @llvm.floor.v2f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z17test_floor_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.floor.v2f32( float2 test_floor_float2(float2 p0) { return floor(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z17test_floor_float3 -// CHECK: call <3 x float> @llvm.floor.v3f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z17test_floor_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.floor.v3f32( float3 test_floor_float3(float3 p0) { return floor(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z17test_floor_float4 -// CHECK: call <4 x float> @llvm.floor.v4f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z17test_floor_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.floor.v4f32( float4 test_floor_float4(float4 p0) { return floor(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/fmod.hlsl b/clang/test/CodeGenHLSL/builtins/fmod.hlsl index 708779daaa7b6..b62967114d456 100644 --- a/clang/test/CodeGenHLSL/builtins/fmod.hlsl +++ b/clang/test/CodeGenHLSL/builtins/fmod.hlsl @@ -5,7 +5,7 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ -// RUN: -DFNATTRS=noundef -DTYPE=half +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTYPE=half // // ---------- No Native Half support test ----------- @@ -13,7 +13,7 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s \ -// RUN: -DFNATTRS=noundef -DTYPE=float +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTYPE=float // Spirv target: @@ -23,7 +23,7 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ -// RUN: -DFNATTRS="spir_func noundef" -DTYPE=half +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half // // ---------- No Native Half support test ----------- @@ -31,47 +31,47 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s \ -// RUN: -DFNATTRS="spir_func noundef" -DTYPE=float +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float // CHECK: define [[FNATTRS]] [[TYPE]] @ -// CHECK: %fmod = frem [[TYPE]] +// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]] // CHECK: ret [[TYPE]] %fmod half test_fmod_half(half p0, half p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @ -// CHECK: %fmod = frem <2 x [[TYPE]]> +// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> // CHECK: ret <2 x [[TYPE]]> %fmod half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @ -// CHECK: %fmod = frem <3 x [[TYPE]]> +// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> // CHECK: ret <3 x [[TYPE]]> %fmod half3 test_fmod_half3(half3 p0, half3 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <4 x [[TYPE]]> @ -// CHECK: %fmod = frem <4 x [[TYPE]]> +// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> // CHECK: ret <4 x [[TYPE]]> %fmod half4 test_fmod_half4(half4 p0, half4 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] float @ -// CHECK: %fmod = frem float +// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn float // CHECK: ret float %fmod float test_fmod_float(float p0, float p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <2 x float> @ -// CHECK: %fmod = frem <2 x float> +// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x float> // CHECK: ret <2 x float> %fmod float2 test_fmod_float2(float2 p0, float2 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <3 x float> @ -// CHECK: %fmod = frem <3 x float> +// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x float> // CHECK: ret <3 x float> %fmod float3 test_fmod_float3(float3 p0, float3 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <4 x float> @ -// CHECK: %fmod = frem <4 x float> +// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x float> // CHECK: ret <4 x float> %fmod float4 test_fmod_float4(float4 p0, float4 p1) { return fmod(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/frac.hlsl b/clang/test/CodeGenHLSL/builtins/frac.hlsl index f0fbba978c023..7b105ce84359f 100644 --- a/clang/test/CodeGenHLSL/builtins/frac.hlsl +++ b/clang/test/CodeGenHLSL/builtins/frac.hlsl @@ -2,63 +2,63 @@ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // NATIVE_HALF: define [[FNATTRS]] half @ -// NATIVE_HALF: %hlsl.frac = call half @llvm.[[TARGET]].frac.f16( +// NATIVE_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].frac.f16( // NATIVE_HALF: ret half %hlsl.frac // NO_HALF: define [[FNATTRS]] float @ -// NO_HALF: %hlsl.frac = call float @llvm.[[TARGET]].frac.f32( +// NO_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].frac.f32( // NO_HALF: ret float %hlsl.frac half test_frac_half(half p0) { return frac(p0); } // NATIVE_HALF: define [[FNATTRS]] <2 x half> @ -// NATIVE_HALF: %hlsl.frac = call <2 x half> @llvm.[[TARGET]].frac.v2f16 +// NATIVE_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].frac.v2f16 // NATIVE_HALF: ret <2 x half> %hlsl.frac // NO_HALF: define [[FNATTRS]] <2 x float> @ -// NO_HALF: %hlsl.frac = call <2 x float> @llvm.[[TARGET]].frac.v2f32( +// NO_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].frac.v2f32( // NO_HALF: ret <2 x float> %hlsl.frac half2 test_frac_half2(half2 p0) { return frac(p0); } // NATIVE_HALF: define [[FNATTRS]] <3 x half> @ -// NATIVE_HALF: %hlsl.frac = call <3 x half> @llvm.[[TARGET]].frac.v3f16 +// NATIVE_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].frac.v3f16 // NATIVE_HALF: ret <3 x half> %hlsl.frac // NO_HALF: define [[FNATTRS]] <3 x float> @ -// NO_HALF: %hlsl.frac = call <3 x float> @llvm.[[TARGET]].frac.v3f32( +// NO_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].frac.v3f32( // NO_HALF: ret <3 x float> %hlsl.frac half3 test_frac_half3(half3 p0) { return frac(p0); } // NATIVE_HALF: define [[FNATTRS]] <4 x half> @ -// NATIVE_HALF: %hlsl.frac = call <4 x half> @llvm.[[TARGET]].frac.v4f16 +// NATIVE_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].frac.v4f16 // NATIVE_HALF: ret <4 x half> %hlsl.frac // NO_HALF: define [[FNATTRS]] <4 x float> @ -// NO_HALF: %hlsl.frac = call <4 x float> @llvm.[[TARGET]].frac.v4f32( +// NO_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].frac.v4f32( // NO_HALF: ret <4 x float> %hlsl.frac half4 test_frac_half4(half4 p0) { return frac(p0); } // CHECK: define [[FNATTRS]] float @ -// CHECK: %hlsl.frac = call float @llvm.[[TARGET]].frac.f32( +// CHECK: %hlsl.frac = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].frac.f32( // CHECK: ret float %hlsl.frac float test_frac_float(float p0) { return frac(p0); } // CHECK: define [[FNATTRS]] <2 x float> @ -// CHECK: %hlsl.frac = call <2 x float> @llvm.[[TARGET]].frac.v2f32 +// CHECK: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].frac.v2f32 // CHECK: ret <2 x float> %hlsl.frac float2 test_frac_float2(float2 p0) { return frac(p0); } // CHECK: define [[FNATTRS]] <3 x float> @ -// CHECK: %hlsl.frac = call <3 x float> @llvm.[[TARGET]].frac.v3f32 +// CHECK: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].frac.v3f32 // CHECK: ret <3 x float> %hlsl.frac float3 test_frac_float3(float3 p0) { return frac(p0); } // CHECK: define [[FNATTRS]] <4 x float> @ -// CHECK: %hlsl.frac = call <4 x float> @llvm.[[TARGET]].frac.v4f32 +// CHECK: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].frac.v4f32 // CHECK: ret <4 x float> %hlsl.frac float4 test_frac_float4(float4 p0) { return frac(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl index 1c23b0df04df9..a24f01d275440 100644 --- a/clang/test/CodeGenHLSL/builtins/length.hlsl +++ b/clang/test/CodeGenHLSL/builtins/length.hlsl @@ -6,36 +6,36 @@ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF: define noundef half @ -// NATIVE_HALF: call half @llvm.fabs.f16(half -// NO_HALF: call float @llvm.fabs.f32(float +// NATIVE_HALF: define noundef nofpclass(nan inf) half @ +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16(half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float // NATIVE_HALF: ret half // NO_HALF: ret float half test_length_half(half p0) { return length(p0); } -// NATIVE_HALF: define noundef half @ -// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16 -// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32( +// NATIVE_HALF: define noundef nofpclass(nan inf) half @ +// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v2f16 +// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32( // NATIVE_HALF: ret half %hlsl.length // NO_HALF: ret float %hlsl.length half test_length_half2(half2 p0) { return length(p0); } -// NATIVE_HALF: define noundef half @ -// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16 -// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32( +// NATIVE_HALF: define noundef nofpclass(nan inf) half @ +// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v3f16 +// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32( // NATIVE_HALF: ret half %hlsl.length // NO_HALF: ret float %hlsl.length half test_length_half3(half3 p0) { return length(p0); } -// NATIVE_HALF: define noundef half @ -// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16 -// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32( +// NATIVE_HALF: define noundef nofpclass(nan inf) half @ +// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v4f16 +// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32( // NATIVE_HALF: ret half %hlsl.length // NO_HALF: ret float %hlsl.length half test_length_half4(half4 p0) @@ -43,29 +43,29 @@ half test_length_half4(half4 p0) return length(p0); } -// CHECK: define noundef float @ -// CHECK: call float @llvm.fabs.f32(float +// CHECK: define noundef nofpclass(nan inf) float @ +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float // CHECK: ret float float test_length_float(float p0) { return length(p0); } -// CHECK: define noundef float @ -// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32( +// CHECK: define noundef nofpclass(nan inf) float @ +// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32( // CHECK: ret float %hlsl.length float test_length_float2(float2 p0) { return length(p0); } -// CHECK: define noundef float @ -// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32( +// CHECK: define noundef nofpclass(nan inf) float @ +// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32( // CHECK: ret float %hlsl.length float test_length_float3(float3 p0) { return length(p0); } -// CHECK: define noundef float @ -// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32( +// CHECK: define noundef nofpclass(nan inf) float @ +// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32( // CHECK: ret float %hlsl.length float test_length_float4(float4 p0) { diff --git a/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl index f9b3cbcddfb69..c98693f32c834 100644 --- a/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl +++ b/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl @@ -1,14 +1,14 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s // CHECK-LABEL: builtin_lerp_half_vector -// CHECK: %hlsl.lerp = call <3 x half> @llvm.dx.lerp.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2) +// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.dx.lerp.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2) // CHECK: ret <3 x half> %hlsl.lerp half3 builtin_lerp_half_vector (half3 p0) { return __builtin_hlsl_lerp ( p0, p0, p0 ); } // CHECK-LABEL: builtin_lerp_floar_vector -// CHECK: %hlsl.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2) +// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2) // CHECK: ret <2 x float> %hlsl.lerp float2 builtin_lerp_floar_vector ( float2 p0) { return __builtin_hlsl_lerp ( p0, p0, p0 ); diff --git a/clang/test/CodeGenHLSL/builtins/lerp.hlsl b/clang/test/CodeGenHLSL/builtins/lerp.hlsl index b11046894bd88..d7a7113de4878 100644 --- a/clang/test/CodeGenHLSL/builtins/lerp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/lerp.hlsl @@ -2,57 +2,57 @@ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv -// NATIVE_HALF: %hlsl.lerp = call half @llvm.[[TARGET]].lerp.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}) +// NATIVE_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].lerp.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}) // NATIVE_HALF: ret half %hlsl.lerp -// NO_HALF: %hlsl.lerp = call float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) +// NO_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) // NO_HALF: ret float %hlsl.lerp half test_lerp_half(half p0) { return lerp(p0, p0, p0); } -// NATIVE_HALF: %hlsl.lerp = call <2 x half> @llvm.[[TARGET]].lerp.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}) +// NATIVE_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].lerp.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}) // NATIVE_HALF: ret <2 x half> %hlsl.lerp -// NO_HALF: %hlsl.lerp = call <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}}) +// NO_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}}) // NO_HALF: ret <2 x float> %hlsl.lerp half2 test_lerp_half2(half2 p0) { return lerp(p0, p0, p0); } -// NATIVE_HALF: %hlsl.lerp = call <3 x half> @llvm.[[TARGET]].lerp.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}}, <3 x half> %{{.*}}) +// NATIVE_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].lerp.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}}, <3 x half> %{{.*}}) // NATIVE_HALF: ret <3 x half> %hlsl.lerp -// NO_HALF: %hlsl.lerp = call <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}}) +// NO_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}}) // NO_HALF: ret <3 x float> %hlsl.lerp half3 test_lerp_half3(half3 p0) { return lerp(p0, p0, p0); } -// NATIVE_HALF: %hlsl.lerp = call <4 x half> @llvm.[[TARGET]].lerp.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x half> %{{.*}}) +// NATIVE_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].lerp.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x half> %{{.*}}) // NATIVE_HALF: ret <4 x half> %hlsl.lerp -// NO_HALF: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) +// NO_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // NO_HALF: ret <4 x float> %hlsl.lerp half4 test_lerp_half4(half4 p0) { return lerp(p0, p0, p0); } -// CHECK: %hlsl.lerp = call float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) +// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) // CHECK: ret float %hlsl.lerp float test_lerp_float(float p0) { return lerp(p0, p0, p0); } -// CHECK: %hlsl.lerp = call <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}}) +// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}}) // CHECK: ret <2 x float> %hlsl.lerp float2 test_lerp_float2(float2 p0) { return lerp(p0, p0, p0); } -// CHECK: %hlsl.lerp = call <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}}) +// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}}) // CHECK: ret <3 x float> %hlsl.lerp float3 test_lerp_float3(float3 p0) { return lerp(p0, p0, p0); } -// CHECK: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) +// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK: ret <4 x float> %hlsl.lerp float4 test_lerp_float4(float4 p0) { return lerp(p0, p0, p0); } diff --git a/clang/test/CodeGenHLSL/builtins/log.hlsl b/clang/test/CodeGenHLSL/builtins/log.hlsl index 71ce502eb8c4a..e489939594a53 100644 --- a/clang/test/CodeGenHLSL/builtins/log.hlsl +++ b/clang/test/CodeGenHLSL/builtins/log.hlsl @@ -5,36 +5,36 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z13test_log_half -// NATIVE_HALF: call half @llvm.log.f16( -// NO_HALF-LABEL: define noundef float @_Z13test_log_half -// NO_HALF: call float @llvm.log.f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_log_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.log.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_log_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.log.f32( half test_log_half(half p0) { return log(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_log_half2 -// NATIVE_HALF: call <2 x half> @llvm.log.v2f16 -// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_log_half2 -// NO_HALF: call <2 x float> @llvm.log.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_log_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.log.v2f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_log_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log.v2f32( half2 test_log_half2(half2 p0) { return log(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_log_half3 -// NATIVE_HALF: call <3 x half> @llvm.log.v3f16 -// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_log_half3 -// NO_HALF: call <3 x float> @llvm.log.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_log_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.log.v3f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_log_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log.v3f32( half3 test_log_half3(half3 p0) { return log(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_log_half4 -// NATIVE_HALF: call <4 x half> @llvm.log.v4f16 -// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_log_half4 -// NO_HALF: call <4 x float> @llvm.log.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_log_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.log.v4f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_log_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log.v4f32( half4 test_log_half4(half4 p0) { return log(p0); } -// CHECK-LABEL: define noundef float @_Z14test_log_float -// CHECK: call float @llvm.log.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_log_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.log.f32( float test_log_float(float p0) { return log(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z15test_log_float2 -// CHECK: call <2 x float> @llvm.log.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_log_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log.v2f32 float2 test_log_float2(float2 p0) { return log(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z15test_log_float3 -// CHECK: call <3 x float> @llvm.log.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_log_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log.v3f32 float3 test_log_float3(float3 p0) { return log(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z15test_log_float4 -// CHECK: call <4 x float> @llvm.log.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_log_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log.v4f32 float4 test_log_float4(float4 p0) { return log(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/log10.hlsl b/clang/test/CodeGenHLSL/builtins/log10.hlsl index e15b6f5747b0a..37c8e837c45a3 100644 --- a/clang/test/CodeGenHLSL/builtins/log10.hlsl +++ b/clang/test/CodeGenHLSL/builtins/log10.hlsl @@ -5,36 +5,36 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z15test_log10_half -// NATIVE_HALF: call half @llvm.log10.f16( -// NO_HALF-LABEL: define noundef float @_Z15test_log10_half -// NO_HALF: call float @llvm.log10.f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z15test_log10_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.log10.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z15test_log10_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.log10.f32( half test_log10_half(half p0) { return log10(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_log10_half2 -// NATIVE_HALF: call <2 x half> @llvm.log10.v2f16 -// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_log10_half2 -// NO_HALF: call <2 x float> @llvm.log10.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z16test_log10_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.log10.v2f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_log10_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log10.v2f32( half2 test_log10_half2(half2 p0) { return log10(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_log10_half3 -// NATIVE_HALF: call <3 x half> @llvm.log10.v3f16 -// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_log10_half3 -// NO_HALF: call <3 x float> @llvm.log10.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z16test_log10_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.log10.v3f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_log10_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log10.v3f32( half3 test_log10_half3(half3 p0) { return log10(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_log10_half4 -// NATIVE_HALF: call <4 x half> @llvm.log10.v4f16 -// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_log10_half4 -// NO_HALF: call <4 x float> @llvm.log10.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z16test_log10_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.log10.v4f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_log10_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log10.v4f32( half4 test_log10_half4(half4 p0) { return log10(p0); } -// CHECK-LABEL: define noundef float @_Z16test_log10_float -// CHECK: call float @llvm.log10.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z16test_log10_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.log10.f32( float test_log10_float(float p0) { return log10(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z17test_log10_float2 -// CHECK: call <2 x float> @llvm.log10.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z17test_log10_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log10.v2f32 float2 test_log10_float2(float2 p0) { return log10(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z17test_log10_float3 -// CHECK: call <3 x float> @llvm.log10.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z17test_log10_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log10.v3f32 float3 test_log10_float3(float3 p0) { return log10(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z17test_log10_float4 -// CHECK: call <4 x float> @llvm.log10.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z17test_log10_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log10.v4f32 float4 test_log10_float4(float4 p0) { return log10(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/log2.hlsl b/clang/test/CodeGenHLSL/builtins/log2.hlsl index 575761a5f637c..5159d5bb0fa4e 100644 --- a/clang/test/CodeGenHLSL/builtins/log2.hlsl +++ b/clang/test/CodeGenHLSL/builtins/log2.hlsl @@ -5,36 +5,36 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z14test_log2_half -// NATIVE_HALF: call half @llvm.log2.f16( -// NO_HALF-LABEL: define noundef float @_Z14test_log2_half -// NO_HALF: call float @llvm.log2.f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z14test_log2_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.log2.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z14test_log2_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.log2.f32( half test_log2_half(half p0) { return log2(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z15test_log2_half2 -// NATIVE_HALF: call <2 x half> @llvm.log2.v2f16 -// NO_HALF-LABEL: define noundef <2 x float> @_Z15test_log2_half2 -// NO_HALF: call <2 x float> @llvm.log2.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z15test_log2_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.log2.v2f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_log2_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log2.v2f32( half2 test_log2_half2(half2 p0) { return log2(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z15test_log2_half3 -// NATIVE_HALF: call <3 x half> @llvm.log2.v3f16 -// NO_HALF-LABEL: define noundef <3 x float> @_Z15test_log2_half3 -// NO_HALF: call <3 x float> @llvm.log2.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z15test_log2_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.log2.v3f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_log2_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log2.v3f32( half3 test_log2_half3(half3 p0) { return log2(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z15test_log2_half4 -// NATIVE_HALF: call <4 x half> @llvm.log2.v4f16 -// NO_HALF-LABEL: define noundef <4 x float> @_Z15test_log2_half4 -// NO_HALF: call <4 x float> @llvm.log2.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z15test_log2_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.log2.v4f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_log2_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log2.v4f32( half4 test_log2_half4(half4 p0) { return log2(p0); } -// CHECK-LABEL: define noundef float @_Z15test_log2_float -// CHECK: call float @llvm.log2.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z15test_log2_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.log2.f32( float test_log2_float(float p0) { return log2(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z16test_log2_float2 -// CHECK: call <2 x float> @llvm.log2.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_log2_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log2.v2f32 float2 test_log2_float2(float2 p0) { return log2(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z16test_log2_float3 -// CHECK: call <3 x float> @llvm.log2.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_log2_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log2.v3f32 float3 test_log2_float3(float3 p0) { return log2(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z16test_log2_float4 -// CHECK: call <4 x float> @llvm.log2.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_log2_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log2.v4f32 float4 test_log2_float4(float4 p0) { return log2(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/mad.hlsl b/clang/test/CodeGenHLSL/builtins/mad.hlsl index 265a2552c80fb..e764e20748d58 100644 --- a/clang/test/CodeGenHLSL/builtins/mad.hlsl +++ b/clang/test/CodeGenHLSL/builtins/mad.hlsl @@ -67,104 +67,104 @@ int16_t4 test_mad_int16_t4(int16_t4 p0, int16_t4 p1, int16_t4 p2) { return mad(p // NATIVE_HALF: %[[p0:.*]] = load half, ptr %p0.addr, align 2 // NATIVE_HALF: %[[p1:.*]] = load half, ptr %p1.addr, align 2 // NATIVE_HALF: %[[p2:.*]] = load half, ptr %p2.addr, align 2 -// NATIVE_HALF: %hlsl.fmad = call half @llvm.fmuladd.f16(half %[[p0]], half %[[p1]], half %[[p2]]) +// NATIVE_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn half @llvm.fmuladd.f16(half %[[p0]], half %[[p1]], half %[[p2]]) // NATIVE_HALF: ret half %hlsl.fmad // NO_HALF: %[[p0:.*]] = load float, ptr %p0.addr, align 4 // NO_HALF: %[[p1:.*]] = load float, ptr %p1.addr, align 4 // NO_HALF: %[[p2:.*]] = load float, ptr %p2.addr, align 4 -// NO_HALF: %hlsl.fmad = call float @llvm.fmuladd.f32(float %[[p0]], float %[[p1]], float %[[p2]]) +// NO_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn float @llvm.fmuladd.f32(float %[[p0]], float %[[p1]], float %[[p2]]) // NO_HALF: ret float %hlsl.fmad half test_mad_half(half p0, half p1, half p2) { return mad(p0, p1, p2); } // NATIVE_HALF: %[[p0:.*]] = load <2 x half>, ptr %p0.addr, align 4 // NATIVE_HALF: %[[p1:.*]] = load <2 x half>, ptr %p1.addr, align 4 // NATIVE_HALF: %[[p2:.*]] = load <2 x half>, ptr %p2.addr, align 4 -// NATIVE_HALF: %hlsl.fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %[[p0]], <2 x half> %[[p1]], <2 x half> %[[p2]]) +// NATIVE_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.fmuladd.v2f16(<2 x half> %[[p0]], <2 x half> %[[p1]], <2 x half> %[[p2]]) // NATIVE_HALF: ret <2 x half> %hlsl.fmad // NO_HALF: %[[p0:.*]] = load <2 x float>, ptr %p0.addr, align 8 // NO_HALF: %[[p1:.*]] = load <2 x float>, ptr %p1.addr, align 8 // NO_HALF: %[[p2:.*]] = load <2 x float>, ptr %p2.addr, align 8 -// NO_HALF: %hlsl.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %[[p0]], <2 x float> %[[p1]], <2 x float> %[[p2]]) +// NO_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fmuladd.v2f32(<2 x float> %[[p0]], <2 x float> %[[p1]], <2 x float> %[[p2]]) // NO_HALF: ret <2 x float> %hlsl.fmad half2 test_mad_half2(half2 p0, half2 p1, half2 p2) { return mad(p0, p1, p2); } // NATIVE_HALF: %[[p0:.*]] = load <3 x half>, ptr %p0.addr, align 8 // NATIVE_HALF: %[[p1:.*]] = load <3 x half>, ptr %p1.addr, align 8 // NATIVE_HALF: %[[p2:.*]] = load <3 x half>, ptr %p2.addr, align 8 -// NATIVE_HALF: %hlsl.fmad = call <3 x half> @llvm.fmuladd.v3f16(<3 x half> %[[p0]], <3 x half> %[[p1]], <3 x half> %[[p2]]) +// NATIVE_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.fmuladd.v3f16(<3 x half> %[[p0]], <3 x half> %[[p1]], <3 x half> %[[p2]]) // NATIVE_HALF: ret <3 x half> %hlsl.fmad // NO_HALF: %[[p0:.*]] = load <3 x float>, ptr %p0.addr, align 16 // NO_HALF: %[[p1:.*]] = load <3 x float>, ptr %p1.addr, align 16 // NO_HALF: %[[p2:.*]] = load <3 x float>, ptr %p2.addr, align 16 -// NO_HALF: %hlsl.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %[[p0]], <3 x float> %[[p1]], <3 x float> %[[p2]]) +// NO_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fmuladd.v3f32(<3 x float> %[[p0]], <3 x float> %[[p1]], <3 x float> %[[p2]]) // NO_HALF: ret <3 x float> %hlsl.fmad half3 test_mad_half3(half3 p0, half3 p1, half3 p2) { return mad(p0, p1, p2); } // NATIVE_HALF: %[[p0:.*]] = load <4 x half>, ptr %p0.addr, align 8 // NATIVE_HALF: %[[p1:.*]] = load <4 x half>, ptr %p1.addr, align 8 // NATIVE_HALF: %[[p2:.*]] = load <4 x half>, ptr %p2.addr, align 8 -// NATIVE_HALF: %hlsl.fmad = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> %[[p0]], <4 x half> %[[p1]], <4 x half> %[[p2]]) +// NATIVE_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.fmuladd.v4f16(<4 x half> %[[p0]], <4 x half> %[[p1]], <4 x half> %[[p2]]) // NATIVE_HALF: ret <4 x half> %hlsl.fmad // NO_HALF: %[[p0:.*]] = load <4 x float>, ptr %p0.addr, align 16 // NO_HALF: %[[p1:.*]] = load <4 x float>, ptr %p1.addr, align 16 // NO_HALF: %[[p2:.*]] = load <4 x float>, ptr %p2.addr, align 16 -// NO_HALF: %hlsl.fmad = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %[[p0]], <4 x float> %[[p1]], <4 x float> %[[p2]]) +// NO_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fmuladd.v4f32(<4 x float> %[[p0]], <4 x float> %[[p1]], <4 x float> %[[p2]]) // NO_HALF: ret <4 x float> %hlsl.fmad half4 test_mad_half4(half4 p0, half4 p1, half4 p2) { return mad(p0, p1, p2); } // CHECK: %[[p0:.*]] = load float, ptr %p0.addr, align 4 // CHECK: %[[p1:.*]] = load float, ptr %p1.addr, align 4 // CHECK: %[[p2:.*]] = load float, ptr %p2.addr, align 4 -// CHECK: %hlsl.fmad = call float @llvm.fmuladd.f32(float %[[p0]], float %[[p1]], float %[[p2]]) +// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn float @llvm.fmuladd.f32(float %[[p0]], float %[[p1]], float %[[p2]]) // CHECK: ret float %hlsl.fmad float test_mad_float(float p0, float p1, float p2) { return mad(p0, p1, p2); } // CHECK: %[[p0:.*]] = load <2 x float>, ptr %p0.addr, align 8 // CHECK: %[[p1:.*]] = load <2 x float>, ptr %p1.addr, align 8 // CHECK: %[[p2:.*]] = load <2 x float>, ptr %p2.addr, align 8 -// CHECK: %hlsl.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %[[p0]], <2 x float> %[[p1]], <2 x float> %[[p2]]) +// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fmuladd.v2f32(<2 x float> %[[p0]], <2 x float> %[[p1]], <2 x float> %[[p2]]) // CHECK: ret <2 x float> %hlsl.fmad float2 test_mad_float2(float2 p0, float2 p1, float2 p2) { return mad(p0, p1, p2); } // CHECK: %[[p0:.*]] = load <3 x float>, ptr %p0.addr, align 16 // CHECK: %[[p1:.*]] = load <3 x float>, ptr %p1.addr, align 16 // CHECK: %[[p2:.*]] = load <3 x float>, ptr %p2.addr, align 16 -// CHECK: %hlsl.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %[[p0]], <3 x float> %[[p1]], <3 x float> %[[p2]]) +// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fmuladd.v3f32(<3 x float> %[[p0]], <3 x float> %[[p1]], <3 x float> %[[p2]]) // CHECK: ret <3 x float> %hlsl.fmad float3 test_mad_float3(float3 p0, float3 p1, float3 p2) { return mad(p0, p1, p2); } // CHECK: %[[p0:.*]] = load <4 x float>, ptr %p0.addr, align 16 // CHECK: %[[p1:.*]] = load <4 x float>, ptr %p1.addr, align 16 // CHECK: %[[p2:.*]] = load <4 x float>, ptr %p2.addr, align 16 -// CHECK: %hlsl.fmad = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %[[p0]], <4 x float> %[[p1]], <4 x float> %[[p2]]) +// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fmuladd.v4f32(<4 x float> %[[p0]], <4 x float> %[[p1]], <4 x float> %[[p2]]) // CHECK: ret <4 x float> %hlsl.fmad float4 test_mad_float4(float4 p0, float4 p1, float4 p2) { return mad(p0, p1, p2); } // CHECK: %[[p0:.*]] = load double, ptr %p0.addr, align 8 // CHECK: %[[p1:.*]] = load double, ptr %p1.addr, align 8 // CHECK: %[[p2:.*]] = load double, ptr %p2.addr, align 8 -// CHECK: %hlsl.fmad = call double @llvm.fmuladd.f64(double %[[p0]], double %[[p1]], double %[[p2]]) +// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn double @llvm.fmuladd.f64(double %[[p0]], double %[[p1]], double %[[p2]]) // CHECK: ret double %hlsl.fmad double test_mad_double(double p0, double p1, double p2) { return mad(p0, p1, p2); } // CHECK: %[[p0:.*]] = load <2 x double>, ptr %p0.addr, align 16 // CHECK: %[[p1:.*]] = load <2 x double>, ptr %p1.addr, align 16 // CHECK: %[[p2:.*]] = load <2 x double>, ptr %p2.addr, align 16 -// CHECK: %hlsl.fmad = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %[[p0]], <2 x double> %[[p1]], <2 x double> %[[p2]]) +// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.fmuladd.v2f64(<2 x double> %[[p0]], <2 x double> %[[p1]], <2 x double> %[[p2]]) // CHECK: ret <2 x double> %hlsl.fmad double2 test_mad_double2(double2 p0, double2 p1, double2 p2) { return mad(p0, p1, p2); } // CHECK: %[[p0:.*]] = load <3 x double>, ptr %p0.addr, align 32 // CHECK: %[[p1:.*]] = load <3 x double>, ptr %p1.addr, align 32 // CHECK: %[[p2:.*]] = load <3 x double>, ptr %p2.addr, align 32 -// CHECK: %hlsl.fmad = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> %[[p0]], <3 x double> %[[p1]], <3 x double> %[[p2]]) +// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.fmuladd.v3f64(<3 x double> %[[p0]], <3 x double> %[[p1]], <3 x double> %[[p2]]) // CHECK: ret <3 x double> %hlsl.fmad double3 test_mad_double3(double3 p0, double3 p1, double3 p2) { return mad(p0, p1, p2); } // CHECK: %[[p0:.*]] = load <4 x double>, ptr %p0.addr, align 32 // CHECK: %[[p1:.*]] = load <4 x double>, ptr %p1.addr, align 32 // CHECK: %[[p2:.*]] = load <4 x double>, ptr %p2.addr, align 32 -// CHECK: %hlsl.fmad = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %[[p0]], <4 x double> %[[p1]], <4 x double> %[[p2]]) +// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.fmuladd.v4f64(<4 x double> %[[p0]], <4 x double> %[[p1]], <4 x double> %[[p2]]) // CHECK: ret <4 x double> %hlsl.fmad double4 test_mad_double4(double4 p0, double4 p1, double4 p2) { return mad(p0, p1, p2); } diff --git a/clang/test/CodeGenHLSL/builtins/max.hlsl b/clang/test/CodeGenHLSL/builtins/max.hlsl index d462fda2ccb09..0b767335556ee 100644 --- a/clang/test/CodeGenHLSL/builtins/max.hlsl +++ b/clang/test/CodeGenHLSL/builtins/max.hlsl @@ -85,49 +85,49 @@ uint64_t3 test_max_ulong3(uint64_t3 p0, uint64_t3 p1) { return max(p0, p1); } // CHECK: call <4 x i64> @llvm.umax.v4i64 uint64_t4 test_max_ulong4(uint64_t4 p0, uint64_t4 p1) { return max(p0, p1); } -// NATIVE_HALF-LABEL: define noundef half @_Z13test_max_half -// NATIVE_HALF: call half @llvm.maxnum.f16( -// NO_HALF-LABEL: define noundef float @_Z13test_max_half -// NO_HALF: call float @llvm.maxnum.f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_max_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.maxnum.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_max_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.maxnum.f32( half test_max_half(half p0, half p1) { return max(p0, p1); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_max_half2 -// NATIVE_HALF: call <2 x half> @llvm.maxnum.v2f16 -// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_max_half2 -// NO_HALF: call <2 x float> @llvm.maxnum.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_max_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.maxnum.v2f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_max_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.maxnum.v2f32( half2 test_max_half2(half2 p0, half2 p1) { return max(p0, p1); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_max_half3 -// NATIVE_HALF: call <3 x half> @llvm.maxnum.v3f16 -// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_max_half3 -// NO_HALF: call <3 x float> @llvm.maxnum.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_max_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.maxnum.v3f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_max_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.maxnum.v3f32( half3 test_max_half3(half3 p0, half3 p1) { return max(p0, p1); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_max_half4 -// NATIVE_HALF: call <4 x half> @llvm.maxnum.v4f16 -// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_max_half4 -// NO_HALF: call <4 x float> @llvm.maxnum.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_max_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.maxnum.v4f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_max_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.maxnum.v4f32( half4 test_max_half4(half4 p0, half4 p1) { return max(p0, p1); } -// CHECK-LABEL: define noundef float @_Z14test_max_float -// CHECK: call float @llvm.maxnum.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_max_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.maxnum.f32( float test_max_float(float p0, float p1) { return max(p0, p1); } -// CHECK-LABEL: define noundef <2 x float> @_Z15test_max_float2 -// CHECK: call <2 x float> @llvm.maxnum.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_max_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.maxnum.v2f32 float2 test_max_float2(float2 p0, float2 p1) { return max(p0, p1); } -// CHECK-LABEL: define noundef <3 x float> @_Z15test_max_float3 -// CHECK: call <3 x float> @llvm.maxnum.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_max_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.maxnum.v3f32 float3 test_max_float3(float3 p0, float3 p1) { return max(p0, p1); } -// CHECK-LABEL: define noundef <4 x float> @_Z15test_max_float4 -// CHECK: call <4 x float> @llvm.maxnum.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_max_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.maxnum.v4f32 float4 test_max_float4(float4 p0, float4 p1) { return max(p0, p1); } -// CHECK-LABEL: define noundef double @_Z15test_max_double -// CHECK: call double @llvm.maxnum.f64( +// CHECK-LABEL: define noundef nofpclass(nan inf) double @_Z15test_max_double +// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.maxnum.f64( double test_max_double(double p0, double p1) { return max(p0, p1); } -// CHECK-LABEL: define noundef <2 x double> @_Z16test_max_double2 -// CHECK: call <2 x double> @llvm.maxnum.v2f64 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x double> @_Z16test_max_double2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.maxnum.v2f64 double2 test_max_double2(double2 p0, double2 p1) { return max(p0, p1); } -// CHECK-LABEL: define noundef <3 x double> @_Z16test_max_double3 -// CHECK: call <3 x double> @llvm.maxnum.v3f64 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x double> @_Z16test_max_double3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.maxnum.v3f64 double3 test_max_double3(double3 p0, double3 p1) { return max(p0, p1); } -// CHECK-LABEL: define noundef <4 x double> @_Z16test_max_double4 -// CHECK: call <4 x double> @llvm.maxnum.v4f64 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x double> @_Z16test_max_double4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.maxnum.v4f64 double4 test_max_double4(double4 p0, double4 p1) { return max(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/min.hlsl b/clang/test/CodeGenHLSL/builtins/min.hlsl index 02d20d13f916d..a352c72f29542 100644 --- a/clang/test/CodeGenHLSL/builtins/min.hlsl +++ b/clang/test/CodeGenHLSL/builtins/min.hlsl @@ -85,49 +85,49 @@ uint64_t3 test_min_ulong3(uint64_t3 p0, uint64_t3 p1) { return min(p0, p1); } // CHECK: call <4 x i64> @llvm.umin.v4i64 uint64_t4 test_min_ulong4(uint64_t4 p0, uint64_t4 p1) { return min(p0, p1); } -// NATIVE_HALF-LABEL: define noundef half @_Z13test_min_half -// NATIVE_HALF: call half @llvm.minnum.f16( -// NO_HALF-LABEL: define noundef float @_Z13test_min_half -// NO_HALF: call float @llvm.minnum.f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_min_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.minnum.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_min_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.minnum.f32( half test_min_half(half p0, half p1) { return min(p0, p1); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_min_half2 -// NATIVE_HALF: call <2 x half> @llvm.minnum.v2f16 -// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_min_half2 -// NO_HALF: call <2 x float> @llvm.minnum.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_min_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.minnum.v2f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_min_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.minnum.v2f32( half2 test_min_half2(half2 p0, half2 p1) { return min(p0, p1); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_min_half3 -// NATIVE_HALF: call <3 x half> @llvm.minnum.v3f16 -// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_min_half3 -// NO_HALF: call <3 x float> @llvm.minnum.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_min_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.minnum.v3f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_min_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.minnum.v3f32( half3 test_min_half3(half3 p0, half3 p1) { return min(p0, p1); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_min_half4 -// NATIVE_HALF: call <4 x half> @llvm.minnum.v4f16 -// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_min_half4 -// NO_HALF: call <4 x float> @llvm.minnum.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_min_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.minnum.v4f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_min_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.minnum.v4f32( half4 test_min_half4(half4 p0, half4 p1) { return min(p0, p1); } -// CHECK-LABEL: define noundef float @_Z14test_min_float -// CHECK: call float @llvm.minnum.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_min_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.minnum.f32( float test_min_float(float p0, float p1) { return min(p0, p1); } -// CHECK-LABEL: define noundef <2 x float> @_Z15test_min_float2 -// CHECK: call <2 x float> @llvm.minnum.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_min_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.minnum.v2f32 float2 test_min_float2(float2 p0, float2 p1) { return min(p0, p1); } -// CHECK-LABEL: define noundef <3 x float> @_Z15test_min_float3 -// CHECK: call <3 x float> @llvm.minnum.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_min_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.minnum.v3f32 float3 test_min_float3(float3 p0, float3 p1) { return min(p0, p1); } -// CHECK-LABEL: define noundef <4 x float> @_Z15test_min_float4 -// CHECK: call <4 x float> @llvm.minnum.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_min_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.minnum.v4f32 float4 test_min_float4(float4 p0, float4 p1) { return min(p0, p1); } -// CHECK-LABEL: define noundef double @_Z15test_min_double -// CHECK: call double @llvm.minnum.f64( +// CHECK-LABEL: define noundef nofpclass(nan inf) double @_Z15test_min_double +// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.minnum.f64( double test_min_double(double p0, double p1) { return min(p0, p1); } -// CHECK-LABEL: define noundef <2 x double> @_Z16test_min_double2 -// CHECK: call <2 x double> @llvm.minnum.v2f64 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x double> @_Z16test_min_double2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.minnum.v2f64 double2 test_min_double2(double2 p0, double2 p1) { return min(p0, p1); } -// CHECK-LABEL: define noundef <3 x double> @_Z16test_min_double3 -// CHECK: call <3 x double> @llvm.minnum.v3f64 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x double> @_Z16test_min_double3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.minnum.v3f64 double3 test_min_double3(double3 p0, double3 p1) { return min(p0, p1); } -// CHECK-LABEL: define noundef <4 x double> @_Z16test_min_double4 -// CHECK: call <4 x double> @llvm.minnum.v4f64 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x double> @_Z16test_min_double4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.minnum.v4f64 double4 test_min_double4(double4 p0, double4 p1) { return min(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/normalize.hlsl b/clang/test/CodeGenHLSL/builtins/normalize.hlsl index 83ad607c14a60..830fc26b7acf0 100644 --- a/clang/test/CodeGenHLSL/builtins/normalize.hlsl +++ b/clang/test/CodeGenHLSL/builtins/normalize.hlsl @@ -2,24 +2,24 @@ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // NATIVE_HALF: define [[FNATTRS]] half @ -// NATIVE_HALF: call half @llvm.[[TARGET]].normalize.f16(half -// NO_HALF: call float @llvm.[[TARGET]].normalize.f32(float +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].normalize.f16(half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].normalize.f32(float // NATIVE_HALF: ret half // NO_HALF: ret float half test_normalize_half(half p0) @@ -27,8 +27,8 @@ half test_normalize_half(half p0) return normalize(p0); } // NATIVE_HALF: define [[FNATTRS]] <2 x half> @ -// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].normalize.v2f16(<2 x half> -// NO_HALF: call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float> +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].normalize.v2f16(<2 x half> +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float> // NATIVE_HALF: ret <2 x half> %hlsl.normalize // NO_HALF: ret <2 x float> %hlsl.normalize half2 test_normalize_half2(half2 p0) @@ -36,8 +36,8 @@ half2 test_normalize_half2(half2 p0) return normalize(p0); } // NATIVE_HALF: define [[FNATTRS]] <3 x half> @ -// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].normalize.v3f16(<3 x half> -// NO_HALF: call <3 x float> @llvm.[[TARGET]].normalize.v3f32(<3 x float> +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].normalize.v3f16(<3 x half> +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].normalize.v3f32(<3 x float> // NATIVE_HALF: ret <3 x half> %hlsl.normalize // NO_HALF: ret <3 x float> %hlsl.normalize half3 test_normalize_half3(half3 p0) @@ -45,8 +45,8 @@ half3 test_normalize_half3(half3 p0) return normalize(p0); } // NATIVE_HALF: define [[FNATTRS]] <4 x half> @ -// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].normalize.v4f16(<4 x half> -// NO_HALF: call <4 x float> @llvm.[[TARGET]].normalize.v4f32(<4 x float> +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].normalize.v4f16(<4 x half> +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].normalize.v4f32(<4 x float> // NATIVE_HALF: ret <4 x half> %hlsl.normalize // NO_HALF: ret <4 x float> %hlsl.normalize half4 test_normalize_half4(half4 p0) @@ -55,14 +55,14 @@ half4 test_normalize_half4(half4 p0) } // CHECK: define [[FNATTRS]] float @ -// CHECK: call float @llvm.[[TARGET]].normalize.f32(float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].normalize.f32(float // CHECK: ret float float test_normalize_float(float p0) { return normalize(p0); } // CHECK: define [[FNATTRS]] <2 x float> @ -// CHECK: %hlsl.normalize = call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float> +// CHECK: %hlsl.normalize = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float> // CHECK: ret <2 x float> %hlsl.normalize float2 test_normalize_float2(float2 p0) @@ -70,14 +70,14 @@ float2 test_normalize_float2(float2 p0) return normalize(p0); } // CHECK: define [[FNATTRS]] <3 x float> @ -// CHECK: %hlsl.normalize = call <3 x float> @llvm.[[TARGET]].normalize.v3f32( +// CHECK: %hlsl.normalize = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].normalize.v3f32( // CHECK: ret <3 x float> %hlsl.normalize float3 test_normalize_float3(float3 p0) { return normalize(p0); } // CHECK: define [[FNATTRS]] <4 x float> @ -// CHECK: %hlsl.normalize = call <4 x float> @llvm.[[TARGET]].normalize.v4f32( +// CHECK: %hlsl.normalize = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].normalize.v4f32( // CHECK: ret <4 x float> %hlsl.normalize float4 test_length_float4(float4 p0) { diff --git a/clang/test/CodeGenHLSL/builtins/pow.hlsl b/clang/test/CodeGenHLSL/builtins/pow.hlsl index 4e18480763343..fd21f1b94c57e 100644 --- a/clang/test/CodeGenHLSL/builtins/pow.hlsl +++ b/clang/test/CodeGenHLSL/builtins/pow.hlsl @@ -5,36 +5,36 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z13test_pow_half -// NATIVE_HALF: call half @llvm.pow.f16( -// NO_HALF-LABEL: define noundef float @_Z13test_pow_half -// NO_HALF: call float @llvm.pow.f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_pow_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.pow.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_pow_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.pow.f32( half test_pow_half(half p0, half p1) { return pow(p0, p1); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_pow_half2 -// NATIVE_HALF: call <2 x half> @llvm.pow.v2f16 -// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_pow_half2 -// NO_HALF: call <2 x float> @llvm.pow.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_pow_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.pow.v2f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_pow_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.pow.v2f32( half2 test_pow_half2(half2 p0, half2 p1) { return pow(p0, p1); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_pow_half3 -// NATIVE_HALF: call <3 x half> @llvm.pow.v3f16 -// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_pow_half3 -// NO_HALF: call <3 x float> @llvm.pow.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_pow_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.pow.v3f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_pow_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.pow.v3f32( half3 test_pow_half3(half3 p0, half3 p1) { return pow(p0, p1); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_pow_half4 -// NATIVE_HALF: call <4 x half> @llvm.pow.v4f16 -// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_pow_half4 -// NO_HALF: call <4 x float> @llvm.pow.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_pow_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.pow.v4f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_pow_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.pow.v4f32( half4 test_pow_half4(half4 p0, half4 p1) { return pow(p0, p1); } -// CHECK-LABEL: define noundef float @_Z14test_pow_float -// CHECK: call float @llvm.pow.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_pow_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.pow.f32( float test_pow_float(float p0, float p1) { return pow(p0, p1); } -// CHECK-LABEL: define noundef <2 x float> @_Z15test_pow_float2 -// CHECK: call <2 x float> @llvm.pow.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_pow_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.pow.v2f32 float2 test_pow_float2(float2 p0, float2 p1) { return pow(p0, p1); } -// CHECK-LABEL: define noundef <3 x float> @_Z15test_pow_float3 -// CHECK: call <3 x float> @llvm.pow.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_pow_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.pow.v3f32 float3 test_pow_float3(float3 p0, float3 p1) { return pow(p0, p1); } -// CHECK-LABEL: define noundef <4 x float> @_Z15test_pow_float4 -// CHECK: call <4 x float> @llvm.pow.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_pow_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.pow.v4f32 float4 test_pow_float4(float4 p0, float4 p1) { return pow(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/radians.hlsl b/clang/test/CodeGenHLSL/builtins/radians.hlsl index 774300525dbf0..b2b6190ea90f6 100644 --- a/clang/test/CodeGenHLSL/builtins/radians.hlsl +++ b/clang/test/CodeGenHLSL/builtins/radians.hlsl @@ -2,65 +2,65 @@ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DTARGET=dx -DFNATTRS=noundef +// RUN: -DTARGET=dx -DFNATTRS="noundef nofpclass(nan inf)" // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DTARGET=dx -DFNATTRS=noundef +// RUN: -DTARGET=dx -DFNATTRS="noundef nofpclass(nan inf)" // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef" +// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef nofpclass(nan inf)" // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef" +// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef nofpclass(nan inf)" // NATIVE_HALF: define [[FNATTRS]] half @ -// NATIVE_HALF: %{{.*}} = call half @llvm.[[TARGET]].radians.f16( +// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].radians.f16( // NATIVE_HALF: ret half %{{.*}} // NO_HALF: define [[FNATTRS]] float @ -// NO_HALF: %{{.*}} = call float @llvm.[[TARGET]].radians.f32( +// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].radians.f32( // NO_HALF: ret float %{{.*}} half test_radians_half(half p0) { return radians(p0); } // NATIVE_HALF: define [[FNATTRS]] <2 x half> @ -// NATIVE_HALF: %{{.*}} = call <2 x half> @llvm.[[TARGET]].radians.v2f16 +// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].radians.v2f16 // NATIVE_HALF: ret <2 x half> %{{.*}} // NO_HALF: define [[FNATTRS]] <2 x float> @ -// NO_HALF: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32( +// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].radians.v2f32( // NO_HALF: ret <2 x float> %{{.*}} half2 test_radians_half2(half2 p0) { return radians(p0); } // NATIVE_HALF: define [[FNATTRS]] <3 x half> @ -// NATIVE_HALF: %{{.*}} = call <3 x half> @llvm.[[TARGET]].radians.v3f16 +// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].radians.v3f16 // NATIVE_HALF: ret <3 x half> %{{.*}} // NO_HALF: define [[FNATTRS]] <3 x float> @ -// NO_HALF: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32( +// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].radians.v3f32( // NO_HALF: ret <3 x float> %{{.*}} half3 test_radians_half3(half3 p0) { return radians(p0); } // NATIVE_HALF: define [[FNATTRS]] <4 x half> @ -// NATIVE_HALF: %{{.*}} = call <4 x half> @llvm.[[TARGET]].radians.v4f16 +// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].radians.v4f16 // NATIVE_HALF: ret <4 x half> %{{.*}} // NO_HALF: define [[FNATTRS]] <4 x float> @ -// NO_HALF: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32( +// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].radians.v4f32( // NO_HALF: ret <4 x float> %{{.*}} half4 test_radians_half4(half4 p0) { return radians(p0); } // CHECK: define [[FNATTRS]] float @ -// CHECK: %{{.*}} = call float @llvm.[[TARGET]].radians.f32( +// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].radians.f32( // CHECK: ret float %{{.*}} float test_radians_float(float p0) { return radians(p0); } // CHECK: define [[FNATTRS]] <2 x float> @ -// CHECK: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32 +// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].radians.v2f32 // CHECK: ret <2 x float> %{{.*}} float2 test_radians_float2(float2 p0) { return radians(p0); } // CHECK: define [[FNATTRS]] <3 x float> @ -// CHECK: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32 +// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].radians.v3f32 // CHECK: ret <3 x float> %{{.*}} float3 test_radians_float3(float3 p0) { return radians(p0); } // CHECK: define [[FNATTRS]] <4 x float> @ -// CHECK: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32 +// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].radians.v4f32 // CHECK: ret <4 x float> %{{.*}} float4 test_radians_float4(float4 p0) { return radians(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/rcp.hlsl b/clang/test/CodeGenHLSL/builtins/rcp.hlsl index 83fe33406c7c8..8f07f3a031531 100644 --- a/clang/test/CodeGenHLSL/builtins/rcp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/rcp.hlsl @@ -13,90 +13,90 @@ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_NO_HALF,SPIR_CHECK -// DXIL_NATIVE_HALF: define noundef half @ -// SPIR_NATIVE_HALF: define spir_func noundef half @ -// NATIVE_HALF: %hlsl.rcp = fdiv half 0xH3C00, %{{.*}} +// DXIL_NATIVE_HALF: define noundef nofpclass(nan inf) half @ +// SPIR_NATIVE_HALF: define spir_func noundef nofpclass(nan inf) half @ +// NATIVE_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn half 0xH3C00, %{{.*}} // NATIVE_HALF: ret half %hlsl.rcp -// DXIL_NO_HALF: define noundef float @ -// SPIR_NO_HALF: define spir_func noundef float @ -// NO_HALF: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}} +// DXIL_NO_HALF: define noundef nofpclass(nan inf) float @ +// SPIR_NO_HALF: define spir_func noundef nofpclass(nan inf) float @ +// NO_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn float 1.000000e+00, %{{.*}} // NO_HALF: ret float %hlsl.rcp half test_rcp_half(half p0) { return rcp(p0); } -// DXIL_NATIVE_HALF: define noundef <2 x half> @ -// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @ -// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> splat (half 0xH3C00), %{{.*}} +// DXIL_NATIVE_HALF: define noundef nofpclass(nan inf) <2 x half> @ +// SPIR_NATIVE_HALF: define spir_func noundef nofpclass(nan inf) <2 x half> @ +// NATIVE_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <2 x half> splat (half 0xH3C00), %{{.*}} // NATIVE_HALF: ret <2 x half> %hlsl.rcp -// DXIL_NO_HALF: define noundef <2 x float> @ -// SPIR_NO_HALF: define spir_func noundef <2 x float> @ -// NO_HALF: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}} +// DXIL_NO_HALF: define noundef nofpclass(nan inf) <2 x float> @ +// SPIR_NO_HALF: define spir_func noundef nofpclass(nan inf) <2 x float> @ +// NO_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <2 x float> splat (float 1.000000e+00), %{{.*}} // NO_HALF: ret <2 x float> %hlsl.rcp half2 test_rcp_half2(half2 p0) { return rcp(p0); } -// DXIL_NATIVE_HALF: define noundef <3 x half> @ -// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @ -// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> splat (half 0xH3C00), %{{.*}} +// DXIL_NATIVE_HALF: define noundef nofpclass(nan inf) <3 x half> @ +// SPIR_NATIVE_HALF: define spir_func noundef nofpclass(nan inf) <3 x half> @ +// NATIVE_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <3 x half> splat (half 0xH3C00), %{{.*}} // NATIVE_HALF: ret <3 x half> %hlsl.rcp -// DXIL_NO_HALF: define noundef <3 x float> @ -// SPIR_NO_HALF: define spir_func noundef <3 x float> @ -// NO_HALF: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}} +// DXIL_NO_HALF: define noundef nofpclass(nan inf) <3 x float> @ +// SPIR_NO_HALF: define spir_func noundef nofpclass(nan inf) <3 x float> @ +// NO_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <3 x float> splat (float 1.000000e+00), %{{.*}} // NO_HALF: ret <3 x float> %hlsl.rcp half3 test_rcp_half3(half3 p0) { return rcp(p0); } -// DXIL_NATIVE_HALF: define noundef <4 x half> @ -// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @ -// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> splat (half 0xH3C00), %{{.*}} +// DXIL_NATIVE_HALF: define noundef nofpclass(nan inf) <4 x half> @ +// SPIR_NATIVE_HALF: define spir_func noundef nofpclass(nan inf) <4 x half> @ +// NATIVE_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <4 x half> splat (half 0xH3C00), %{{.*}} // NATIVE_HALF: ret <4 x half> %hlsl.rcp -// DXIL_NO_HALF: define noundef <4 x float> @ -// SPIR_NO_HALF: define spir_func noundef <4 x float> @ -// NO_HALF: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}} +// DXIL_NO_HALF: define noundef nofpclass(nan inf) <4 x float> @ +// SPIR_NO_HALF: define spir_func noundef nofpclass(nan inf) <4 x float> @ +// NO_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <4 x float> splat (float 1.000000e+00), %{{.*}} // NO_HALF: ret <4 x float> %hlsl.rcp half4 test_rcp_half4(half4 p0) { return rcp(p0); } -// DXIL_CHECK: define noundef float @ -// SPIR_CHECK: define spir_func noundef float @ -// CHECK: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}} +// DXIL_CHECK: define noundef nofpclass(nan inf) float @ +// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) float @ +// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn float 1.000000e+00, %{{.*}} // CHECK: ret float %hlsl.rcp float test_rcp_float(float p0) { return rcp(p0); } -// DXIL_CHECK: define noundef <2 x float> @ -// SPIR_CHECK: define spir_func noundef <2 x float> @ -// CHECK: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}} +// DXIL_CHECK: define noundef nofpclass(nan inf) <2 x float> @ +// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <2 x float> @ +// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <2 x float> splat (float 1.000000e+00), %{{.*}} // CHECK: ret <2 x float> %hlsl.rcp float2 test_rcp_float2(float2 p0) { return rcp(p0); } -// DXIL_CHECK: define noundef <3 x float> @ -// SPIR_CHECK: define spir_func noundef <3 x float> @ -// CHECK: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}} +// DXIL_CHECK: define noundef nofpclass(nan inf) <3 x float> @ +// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <3 x float> @ +// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <3 x float> splat (float 1.000000e+00), %{{.*}} // CHECK: ret <3 x float> %hlsl.rcp float3 test_rcp_float3(float3 p0) { return rcp(p0); } -// DXIL_CHECK: define noundef <4 x float> @ -// SPIR_CHECK: define spir_func noundef <4 x float> @ -// CHECK: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}} +// DXIL_CHECK: define noundef nofpclass(nan inf) <4 x float> @ +// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <4 x float> @ +// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <4 x float> splat (float 1.000000e+00), %{{.*}} // CHECK: ret <4 x float> %hlsl.rcp float4 test_rcp_float4(float4 p0) { return rcp(p0); } -// DXIL_CHECK: define noundef double @ -// SPIR_CHECK: define spir_func noundef double @ -// CHECK: %hlsl.rcp = fdiv double 1.000000e+00, %{{.*}} +// DXIL_CHECK: define noundef nofpclass(nan inf) double @ +// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) double @ +// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn double 1.000000e+00, %{{.*}} // CHECK: ret double %hlsl.rcp double test_rcp_double(double p0) { return rcp(p0); } -// DXIL_CHECK: define noundef <2 x double> @ -// SPIR_CHECK: define spir_func noundef <2 x double> @ -// CHECK: %hlsl.rcp = fdiv <2 x double> splat (double 1.000000e+00), %{{.*}} +// DXIL_CHECK: define noundef nofpclass(nan inf) <2 x double> @ +// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <2 x double> @ +// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <2 x double> splat (double 1.000000e+00), %{{.*}} // CHECK: ret <2 x double> %hlsl.rcp double2 test_rcp_double2(double2 p0) { return rcp(p0); } -// DXIL_CHECK: define noundef <3 x double> @ -// SPIR_CHECK: define spir_func noundef <3 x double> @ -// CHECK: %hlsl.rcp = fdiv <3 x double> splat (double 1.000000e+00), %{{.*}} +// DXIL_CHECK: define noundef nofpclass(nan inf) <3 x double> @ +// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <3 x double> @ +// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <3 x double> splat (double 1.000000e+00), %{{.*}} // CHECK: ret <3 x double> %hlsl.rcp double3 test_rcp_double3(double3 p0) { return rcp(p0); } -// DXIL_CHECK: define noundef <4 x double> @ -// SPIR_CHECK: define spir_func noundef <4 x double> @ -// CHECK: %hlsl.rcp = fdiv <4 x double> splat (double 1.000000e+00), %{{.*}} +// DXIL_CHECK: define noundef nofpclass(nan inf) <4 x double> @ +// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <4 x double> @ +// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <4 x double> splat (double 1.000000e+00), %{{.*}} // CHECK: ret <4 x double> %hlsl.rcp double4 test_rcp_double4(double4 p0) { return rcp(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/round.hlsl b/clang/test/CodeGenHLSL/builtins/round.hlsl index 6da63a394a8fd..a945a9677abbb 100644 --- a/clang/test/CodeGenHLSL/builtins/round.hlsl +++ b/clang/test/CodeGenHLSL/builtins/round.hlsl @@ -5,48 +5,48 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z15test_round_half -// NATIVE_HALF: %elt.roundeven = call half @llvm.roundeven.f16( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z15test_round_half +// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn half @llvm.roundeven.f16( // NATIVE_HALF: ret half %elt.roundeven -// NO_HALF-LABEL: define noundef float @_Z15test_round_half -// NO_HALF: %elt.roundeven = call float @llvm.roundeven.f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z15test_round_half +// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( // NO_HALF: ret float %elt.roundeven half test_round_half(half p0) { return round(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_round_half2 -// NATIVE_HALF: %elt.roundeven = call <2 x half> @llvm.roundeven.v2f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z16test_round_half2 +// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.roundeven.v2f16 // NATIVE_HALF: ret <2 x half> %elt.roundeven -// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_round_half2 -// NO_HALF: %elt.roundeven = call <2 x float> @llvm.roundeven.v2f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_round_half2 +// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32( // NO_HALF: ret <2 x float> %elt.roundeven half2 test_round_half2(half2 p0) { return round(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_round_half3 -// NATIVE_HALF: %elt.roundeven = call <3 x half> @llvm.roundeven.v3f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z16test_round_half3 +// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.roundeven.v3f16 // NATIVE_HALF: ret <3 x half> %elt.roundeven -// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_round_half3 -// NO_HALF: %elt.roundeven = call <3 x float> @llvm.roundeven.v3f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_round_half3 +// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32( // NO_HALF: ret <3 x float> %elt.roundeven half3 test_round_half3(half3 p0) { return round(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_round_half4 -// NATIVE_HALF: %elt.roundeven = call <4 x half> @llvm.roundeven.v4f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z16test_round_half4 +// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.roundeven.v4f16 // NATIVE_HALF: ret <4 x half> %elt.roundeven -// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_round_half4 -// NO_HALF: %elt.roundeven = call <4 x float> @llvm.roundeven.v4f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_round_half4 +// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32( // NO_HALF: ret <4 x float> %elt.roundeven half4 test_round_half4(half4 p0) { return round(p0); } -// CHECK-LABEL: define noundef float @_Z16test_round_float -// CHECK: %elt.roundeven = call float @llvm.roundeven.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z16test_round_float +// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32( // CHECK: ret float %elt.roundeven float test_round_float(float p0) { return round(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z17test_round_float2 -// CHECK: %elt.roundeven = call <2 x float> @llvm.roundeven.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z17test_round_float2 +// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32 // CHECK: ret <2 x float> %elt.roundeven float2 test_round_float2(float2 p0) { return round(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z17test_round_float3 -// CHECK: %elt.roundeven = call <3 x float> @llvm.roundeven.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z17test_round_float3 +// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32 // CHECK: ret <3 x float> %elt.roundeven float3 test_round_float3(float3 p0) { return round(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z17test_round_float4 -// CHECK: %elt.roundeven = call <4 x float> @llvm.roundeven.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z17test_round_float4 +// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32 // CHECK: ret <4 x float> %elt.roundeven float4 test_round_float4(float4 p0) { return round(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl b/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl index b1b53fc187da6..6c9b1f643713b 100644 --- a/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl +++ b/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl @@ -2,63 +2,63 @@ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // NATIVE_HALF: define [[FNATTRS]] half @ -// NATIVE_HALF: %hlsl.rsqrt = call half @llvm.[[TARGET]].rsqrt.f16( +// NATIVE_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].rsqrt.f16( // NATIVE_HALF: ret half %hlsl.rsqrt // NO_HALF: define [[FNATTRS]] float @ -// NO_HALF: %hlsl.rsqrt = call float @llvm.[[TARGET]].rsqrt.f32( +// NO_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].rsqrt.f32( // NO_HALF: ret float %hlsl.rsqrt half test_rsqrt_half(half p0) { return rsqrt(p0); } // NATIVE_HALF: define [[FNATTRS]] <2 x half> @ -// NATIVE_HALF: %hlsl.rsqrt = call <2 x half> @llvm.[[TARGET]].rsqrt.v2f16 +// NATIVE_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].rsqrt.v2f16 // NATIVE_HALF: ret <2 x half> %hlsl.rsqrt // NO_HALF: define [[FNATTRS]] <2 x float> @ -// NO_HALF: %hlsl.rsqrt = call <2 x float> @llvm.[[TARGET]].rsqrt.v2f32( +// NO_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].rsqrt.v2f32( // NO_HALF: ret <2 x float> %hlsl.rsqrt half2 test_rsqrt_half2(half2 p0) { return rsqrt(p0); } // NATIVE_HALF: define [[FNATTRS]] <3 x half> @ -// NATIVE_HALF: %hlsl.rsqrt = call <3 x half> @llvm.[[TARGET]].rsqrt.v3f16 +// NATIVE_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].rsqrt.v3f16 // NATIVE_HALF: ret <3 x half> %hlsl.rsqrt // NO_HALF: define [[FNATTRS]] <3 x float> @ -// NO_HALF: %hlsl.rsqrt = call <3 x float> @llvm.[[TARGET]].rsqrt.v3f32( +// NO_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].rsqrt.v3f32( // NO_HALF: ret <3 x float> %hlsl.rsqrt half3 test_rsqrt_half3(half3 p0) { return rsqrt(p0); } // NATIVE_HALF: define [[FNATTRS]] <4 x half> @ -// NATIVE_HALF: %hlsl.rsqrt = call <4 x half> @llvm.[[TARGET]].rsqrt.v4f16 +// NATIVE_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].rsqrt.v4f16 // NATIVE_HALF: ret <4 x half> %hlsl.rsqrt // NO_HALF: define [[FNATTRS]] <4 x float> @ -// NO_HALF: %hlsl.rsqrt = call <4 x float> @llvm.[[TARGET]].rsqrt.v4f32( +// NO_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].rsqrt.v4f32( // NO_HALF: ret <4 x float> %hlsl.rsqrt half4 test_rsqrt_half4(half4 p0) { return rsqrt(p0); } // CHECK: define [[FNATTRS]] float @ -// CHECK: %hlsl.rsqrt = call float @llvm.[[TARGET]].rsqrt.f32( +// CHECK: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].rsqrt.f32( // CHECK: ret float %hlsl.rsqrt float test_rsqrt_float(float p0) { return rsqrt(p0); } // CHECK: define [[FNATTRS]] <2 x float> @ -// CHECK: %hlsl.rsqrt = call <2 x float> @llvm.[[TARGET]].rsqrt.v2f32 +// CHECK: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].rsqrt.v2f32 // CHECK: ret <2 x float> %hlsl.rsqrt float2 test_rsqrt_float2(float2 p0) { return rsqrt(p0); } // CHECK: define [[FNATTRS]] <3 x float> @ -// CHECK: %hlsl.rsqrt = call <3 x float> @llvm.[[TARGET]].rsqrt.v3f32 +// CHECK: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].rsqrt.v3f32 // CHECK: ret <3 x float> %hlsl.rsqrt float3 test_rsqrt_float3(float3 p0) { return rsqrt(p0); } // CHECK: define [[FNATTRS]] <4 x float> @ -// CHECK: %hlsl.rsqrt = call <4 x float> @llvm.[[TARGET]].rsqrt.v4f32 +// CHECK: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].rsqrt.v4f32 // CHECK: ret <4 x float> %hlsl.rsqrt float4 test_rsqrt_float4(float4 p0) { return rsqrt(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/saturate.hlsl b/clang/test/CodeGenHLSL/builtins/saturate.hlsl index c221f6e0f2c36..3304073d9b501 100644 --- a/clang/test/CodeGenHLSL/builtins/saturate.hlsl +++ b/clang/test/CodeGenHLSL/builtins/saturate.hlsl @@ -13,48 +13,48 @@ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -Dtar=spv // NATIVE_HALF-LABEL: define{{.*}} half @_Z18test_saturate_halfDh -// NATIVE_HALF: call half @llvm.[[tar]].saturate.f16( +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[tar]].saturate.f16( // NO_HALF-LABEL: define{{.*}} float @_Z18test_saturate_halfDh -// NO_HALF: call float @llvm.[[tar]].saturate.f32( +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.[[tar]].saturate.f32( half test_saturate_half(half p0) { return saturate(p0); } // NATIVE_HALF-LABEL: define{{.*}} <2 x half> @_Z19test_saturate_half2Dv2_Dh -// NATIVE_HALF: call <2 x half> @llvm.[[tar]].saturate.v2f16 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[tar]].saturate.v2f16 // NO_HALF-LABEL: define{{.*}} <2 x float> @_Z19test_saturate_half2Dv2_Dh -// NO_HALF: call <2 x float> @llvm.[[tar]].saturate.v2f32( +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[tar]].saturate.v2f32( half2 test_saturate_half2(half2 p0) { return saturate(p0); } // NATIVE_HALF-LABEL: define{{.*}} <3 x half> @_Z19test_saturate_half3Dv3_Dh( -// NATIVE_HALF: call <3 x half> @llvm.[[tar]].saturate.v3f16 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[tar]].saturate.v3f16 // NO_HALF-LABEL: define{{.*}} <3 x float> @_Z19test_saturate_half3Dv3_Dh(<3 x float> -// NO_HALF: call <3 x float> @llvm.[[tar]].saturate.v3f32( +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[tar]].saturate.v3f32( half3 test_saturate_half3(half3 p0) { return saturate(p0); } // NATIVE_HALF-LABEL: define{{.*}} <4 x half> @_Z19test_saturate_half4Dv4_Dh( -// NATIVE_HALF: call <4 x half> @llvm.[[tar]].saturate.v4f16 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[tar]].saturate.v4f16 // NO_HALF-LABEL: define{{.*}} <4 x float> @_Z19test_saturate_half4Dv4_Dh(<4 x float> -// NO_HALF: call <4 x float> @llvm.[[tar]].saturate.v4f32( +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[tar]].saturate.v4f32( half4 test_saturate_half4(half4 p0) { return saturate(p0); } // CHECK-LABEL: define{{.*}} float @_Z19test_saturate_floatf( -// CHECK: call float @llvm.[[tar]].saturate.f32( +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.[[tar]].saturate.f32( float test_saturate_float(float p0) { return saturate(p0); } // CHECK-LABEL: define{{.*}} <2 x float> @_Z20test_saturate_float2Dv2_f(<2 x float> -// CHECK: call <2 x float> @llvm.[[tar]].saturate.v2f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[tar]].saturate.v2f32 float2 test_saturate_float2(float2 p0) { return saturate(p0); } // CHECK-LABEL: define{{.*}} <3 x float> @_Z20test_saturate_float3Dv3_f(<3 x float> -// CHECK: call <3 x float> @llvm.[[tar]].saturate.v3f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[tar]].saturate.v3f32 float3 test_saturate_float3(float3 p0) { return saturate(p0); } // CHECK-LABEL: define{{.*}} <4 x float> @_Z20test_saturate_float4Dv4_f(<4 x float> -// CHECK: call <4 x float> @llvm.[[tar]].saturate.v4f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[tar]].saturate.v4f32 float4 test_saturate_float4(float4 p0) { return saturate(p0); } // CHECK-LABEL: define{{.*}} double @_Z20test_saturate_doubled(double -// CHECK: call double @llvm.[[tar]].saturate.f64( +// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.[[tar]].saturate.f64( double test_saturate_double(double p0) { return saturate(p0); } // CHECK-LABEL: define{{.*}} <2 x double> @_Z21test_saturate_double2Dv2_d(<2 x double> -// CHECK: call <2 x double> @llvm.[[tar]].saturate.v2f64 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.[[tar]].saturate.v2f64 double2 test_saturate_double2(double2 p0) { return saturate(p0); } // CHECK-LABEL: define{{.*}} <3 x double> @_Z21test_saturate_double3Dv3_d(<3 x double> -// CHECK: call <3 x double> @llvm.[[tar]].saturate.v3f64 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.[[tar]].saturate.v3f64 double3 test_saturate_double3(double3 p0) { return saturate(p0); } // CHECK-LABEL: define{{.*}} <4 x double> @_Z21test_saturate_double4Dv4_d(<4 x double> -// CHECK: call <4 x double> @llvm.[[tar]].saturate.v4f64 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[tar]].saturate.v4f64 double4 test_saturate_double4(double4 p0) { return saturate(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/sin.hlsl b/clang/test/CodeGenHLSL/builtins/sin.hlsl index 9f7fa5043bdc7..69c657239ef95 100644 --- a/clang/test/CodeGenHLSL/builtins/sin.hlsl +++ b/clang/test/CodeGenHLSL/builtins/sin.hlsl @@ -5,36 +5,36 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z13test_sin_half -// NATIVE_HALF: call half @llvm.sin.f16( -// NO_HALF-LABEL: define noundef float @_Z13test_sin_half -// NO_HALF: call float @llvm.sin.f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_sin_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.sin.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_sin_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.sin.f32( half test_sin_half(half p0) { return sin(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_sin_half2 -// NATIVE_HALF: call <2 x half> @llvm.sin.v2f16 -// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_sin_half2 -// NO_HALF: call <2 x float> @llvm.sin.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_sin_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.sin.v2f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_sin_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sin.v2f32( half2 test_sin_half2(half2 p0) { return sin(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_sin_half3 -// NATIVE_HALF: call <3 x half> @llvm.sin.v3f16 -// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_sin_half3 -// NO_HALF: call <3 x float> @llvm.sin.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_sin_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.sin.v3f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_sin_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sin.v3f32( half3 test_sin_half3(half3 p0) { return sin(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_sin_half4 -// NATIVE_HALF: call <4 x half> @llvm.sin.v4f16 -// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_sin_half4 -// NO_HALF: call <4 x float> @llvm.sin.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_sin_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.sin.v4f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_sin_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sin.v4f32( half4 test_sin_half4(half4 p0) { return sin(p0); } -// CHECK-LABEL: define noundef float @_Z14test_sin_float -// CHECK: call float @llvm.sin.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_sin_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.sin.f32( float test_sin_float(float p0) { return sin(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z15test_sin_float2 -// CHECK: call <2 x float> @llvm.sin.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_sin_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sin.v2f32 float2 test_sin_float2(float2 p0) { return sin(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z15test_sin_float3 -// CHECK: call <3 x float> @llvm.sin.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_sin_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sin.v3f32 float3 test_sin_float3(float3 p0) { return sin(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z15test_sin_float4 -// CHECK: call <4 x float> @llvm.sin.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_sin_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sin.v4f32 float4 test_sin_float4(float4 p0) { return sin(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/sinh.hlsl b/clang/test/CodeGenHLSL/builtins/sinh.hlsl index 167f6f2242235..d55d60515418c 100644 --- a/clang/test/CodeGenHLSL/builtins/sinh.hlsl +++ b/clang/test/CodeGenHLSL/builtins/sinh.hlsl @@ -7,53 +7,53 @@ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF // CHECK-LABEL: test_sinh_half -// NATIVE_HALF: call half @llvm.sinh.f16 -// NO_HALF: call float @llvm.sinh.f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.sinh.f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.sinh.f32 half test_sinh_half ( half p0 ) { return sinh ( p0 ); } // CHECK-LABEL: test_sinh_half2 -// NATIVE_HALF: call <2 x half> @llvm.sinh.v2f16 -// NO_HALF: call <2 x float> @llvm.sinh.v2f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.sinh.v2f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sinh.v2f32 half2 test_sinh_half2 ( half2 p0 ) { return sinh ( p0 ); } // CHECK-LABEL: test_sinh_half3 -// NATIVE_HALF: call <3 x half> @llvm.sinh.v3f16 -// NO_HALF: call <3 x float> @llvm.sinh.v3f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.sinh.v3f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sinh.v3f32 half3 test_sinh_half3 ( half3 p0 ) { return sinh ( p0 ); } // CHECK-LABEL: test_sinh_half4 -// NATIVE_HALF: call <4 x half> @llvm.sinh.v4f16 -// NO_HALF: call <4 x float> @llvm.sinh.v4f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.sinh.v4f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sinh.v4f32 half4 test_sinh_half4 ( half4 p0 ) { return sinh ( p0 ); } // CHECK-LABEL: test_sinh_float -// CHECK: call float @llvm.sinh.f32 +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.sinh.f32 float test_sinh_float ( float p0 ) { return sinh ( p0 ); } // CHECK-LABEL: test_sinh_float2 -// CHECK: call <2 x float> @llvm.sinh.v2f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sinh.v2f32 float2 test_sinh_float2 ( float2 p0 ) { return sinh ( p0 ); } // CHECK-LABEL: test_sinh_float3 -// CHECK: call <3 x float> @llvm.sinh.v3f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sinh.v3f32 float3 test_sinh_float3 ( float3 p0 ) { return sinh ( p0 ); } // CHECK-LABEL: test_sinh_float4 -// CHECK: call <4 x float> @llvm.sinh.v4f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sinh.v4f32 float4 test_sinh_float4 ( float4 p0 ) { return sinh ( p0 ); } diff --git a/clang/test/CodeGenHLSL/builtins/sqrt.hlsl b/clang/test/CodeGenHLSL/builtins/sqrt.hlsl index 63454cea3fe6f..94d966f0bef8a 100644 --- a/clang/test/CodeGenHLSL/builtins/sqrt.hlsl +++ b/clang/test/CodeGenHLSL/builtins/sqrt.hlsl @@ -5,48 +5,48 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z14test_sqrt_half -// NATIVE_HALF: %{{.*}} = call half @llvm.sqrt.f16( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z14test_sqrt_half +// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn half @llvm.sqrt.f16( // NATIVE_HALF: ret half %{{.*}} -// NO_HALF-LABEL: define noundef float @_Z14test_sqrt_half -// NO_HALF: %{{.*}} = call float @llvm.sqrt.f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z14test_sqrt_half +// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn float @llvm.sqrt.f32( // NO_HALF: ret float %{{.*}} half test_sqrt_half(half p0) { return sqrt(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z15test_sqrt_half2 -// NATIVE_HALF: %{{.*}} = call <2 x half> @llvm.sqrt.v2f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z15test_sqrt_half2 +// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.sqrt.v2f16 // NATIVE_HALF: ret <2 x half> %{{.*}} -// NO_HALF-LABEL: define noundef <2 x float> @_Z15test_sqrt_half2 -// NO_HALF: %{{.*}} = call <2 x float> @llvm.sqrt.v2f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_sqrt_half2 +// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sqrt.v2f32( // NO_HALF: ret <2 x float> %{{.*}} half2 test_sqrt_half2(half2 p0) { return sqrt(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z15test_sqrt_half3 -// NATIVE_HALF: %{{.*}} = call <3 x half> @llvm.sqrt.v3f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z15test_sqrt_half3 +// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.sqrt.v3f16 // NATIVE_HALF: ret <3 x half> %{{.*}} -// NO_HALF-LABEL: define noundef <3 x float> @_Z15test_sqrt_half3 -// NO_HALF: %{{.*}} = call <3 x float> @llvm.sqrt.v3f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_sqrt_half3 +// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sqrt.v3f32( // NO_HALF: ret <3 x float> %{{.*}} half3 test_sqrt_half3(half3 p0) { return sqrt(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z15test_sqrt_half4 -// NATIVE_HALF: %{{.*}} = call <4 x half> @llvm.sqrt.v4f16 +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z15test_sqrt_half4 +// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.sqrt.v4f16 // NATIVE_HALF: ret <4 x half> %{{.*}} -// NO_HALF-LABEL: define noundef <4 x float> @_Z15test_sqrt_half4 -// NO_HALF: %{{.*}} = call <4 x float> @llvm.sqrt.v4f32( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_sqrt_half4 +// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sqrt.v4f32( // NO_HALF: ret <4 x float> %{{.*}} half4 test_sqrt_half4(half4 p0) { return sqrt(p0); } -// CHECK-LABEL: define noundef float @_Z15test_sqrt_float -// CHECK: %{{.*}} = call float @llvm.sqrt.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z15test_sqrt_float +// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn float @llvm.sqrt.f32( // CHECK: ret float %{{.*}} float test_sqrt_float(float p0) { return sqrt(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z16test_sqrt_float2 -// CHECK: %{{.*}} = call <2 x float> @llvm.sqrt.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_sqrt_float2 +// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sqrt.v2f32 // CHECK: ret <2 x float> %{{.*}} float2 test_sqrt_float2(float2 p0) { return sqrt(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z16test_sqrt_float3 -// CHECK: %{{.*}} = call <3 x float> @llvm.sqrt.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_sqrt_float3 +// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sqrt.v3f32 // CHECK: ret <3 x float> %{{.*}} float3 test_sqrt_float3(float3 p0) { return sqrt(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z16test_sqrt_float4 -// CHECK: %{{.*}} = call <4 x float> @llvm.sqrt.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_sqrt_float4 +// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sqrt.v4f32 // CHECK: ret <4 x float> %{{.*}} float4 test_sqrt_float4(float4 p0) { return sqrt(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/step.hlsl b/clang/test/CodeGenHLSL/builtins/step.hlsl index 442f4930ca579..49d09e5c6fe6f 100644 --- a/clang/test/CodeGenHLSL/builtins/step.hlsl +++ b/clang/test/CodeGenHLSL/builtins/step.hlsl @@ -2,24 +2,24 @@ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS=noundef -DTARGET=dx +// RUN: -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ // RUN: --check-prefixes=CHECK,NATIVE_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \ -// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv +// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv // NATIVE_HALF: define [[FNATTRS]] half @ -// NATIVE_HALF: call half @llvm.[[TARGET]].step.f16(half -// NO_HALF: call float @llvm.[[TARGET]].step.f32(float +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].step.f16(half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].step.f32(float // NATIVE_HALF: ret half // NO_HALF: ret float half test_step_half(half p0, half p1) @@ -27,8 +27,8 @@ half test_step_half(half p0, half p1) return step(p0, p1); } // NATIVE_HALF: define [[FNATTRS]] <2 x half> @ -// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].step.v2f16(<2 x half> -// NO_HALF: call <2 x float> @llvm.[[TARGET]].step.v2f32(<2 x float> +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].step.v2f16(<2 x half> +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].step.v2f32(<2 x float> // NATIVE_HALF: ret <2 x half> %hlsl.step // NO_HALF: ret <2 x float> %hlsl.step half2 test_step_half2(half2 p0, half2 p1) @@ -36,8 +36,8 @@ half2 test_step_half2(half2 p0, half2 p1) return step(p0, p1); } // NATIVE_HALF: define [[FNATTRS]] <3 x half> @ -// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].step.v3f16(<3 x half> -// NO_HALF: call <3 x float> @llvm.[[TARGET]].step.v3f32(<3 x float> +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].step.v3f16(<3 x half> +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].step.v3f32(<3 x float> // NATIVE_HALF: ret <3 x half> %hlsl.step // NO_HALF: ret <3 x float> %hlsl.step half3 test_step_half3(half3 p0, half3 p1) @@ -45,8 +45,8 @@ half3 test_step_half3(half3 p0, half3 p1) return step(p0, p1); } // NATIVE_HALF: define [[FNATTRS]] <4 x half> @ -// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].step.v4f16(<4 x half> -// NO_HALF: call <4 x float> @llvm.[[TARGET]].step.v4f32(<4 x float> +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].step.v4f16(<4 x half> +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].step.v4f32(<4 x float> // NATIVE_HALF: ret <4 x half> %hlsl.step // NO_HALF: ret <4 x float> %hlsl.step half4 test_step_half4(half4 p0, half4 p1) @@ -55,28 +55,28 @@ half4 test_step_half4(half4 p0, half4 p1) } // CHECK: define [[FNATTRS]] float @ -// CHECK: call float @llvm.[[TARGET]].step.f32(float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].step.f32(float // CHECK: ret float float test_step_float(float p0, float p1) { return step(p0, p1); } // CHECK: define [[FNATTRS]] <2 x float> @ -// CHECK: %hlsl.step = call <2 x float> @llvm.[[TARGET]].step.v2f32( +// CHECK: %hlsl.step = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].step.v2f32( // CHECK: ret <2 x float> %hlsl.step float2 test_step_float2(float2 p0, float2 p1) { return step(p0, p1); } // CHECK: define [[FNATTRS]] <3 x float> @ -// CHECK: %hlsl.step = call <3 x float> @llvm.[[TARGET]].step.v3f32( +// CHECK: %hlsl.step = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].step.v3f32( // CHECK: ret <3 x float> %hlsl.step float3 test_step_float3(float3 p0, float3 p1) { return step(p0, p1); } // CHECK: define [[FNATTRS]] <4 x float> @ -// CHECK: %hlsl.step = call <4 x float> @llvm.[[TARGET]].step.v4f32( +// CHECK: %hlsl.step = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].step.v4f32( // CHECK: ret <4 x float> %hlsl.step float4 test_step_float4(float4 p0, float4 p1) { diff --git a/clang/test/CodeGenHLSL/builtins/tan.hlsl b/clang/test/CodeGenHLSL/builtins/tan.hlsl index aa542fac226d0..c8c948624a613 100644 --- a/clang/test/CodeGenHLSL/builtins/tan.hlsl +++ b/clang/test/CodeGenHLSL/builtins/tan.hlsl @@ -7,53 +7,53 @@ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF // CHECK-LABEL: test_tan_half -// NATIVE_HALF: call half @llvm.tan.f16 -// NO_HALF: call float @llvm.tan.f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.tan.f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.tan.f32 half test_tan_half ( half p0 ) { return tan ( p0 ); } // CHECK-LABEL: test_tan_half2 -// NATIVE_HALF: call <2 x half> @llvm.tan.v2f16 -// NO_HALF: call <2 x float> @llvm.tan.v2f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.tan.v2f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.tan.v2f32 half2 test_tan_half2 ( half2 p0 ) { return tan ( p0 ); } // CHECK-LABEL: test_tan_half3 -// NATIVE_HALF: call <3 x half> @llvm.tan.v3f16 -// NO_HALF: call <3 x float> @llvm.tan.v3f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.tan.v3f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.tan.v3f32 half3 test_tan_half3 ( half3 p0 ) { return tan ( p0 ); } // CHECK-LABEL: test_tan_half4 -// NATIVE_HALF: call <4 x half> @llvm.tan.v4f16 -// NO_HALF: call <4 x float> @llvm.tan.v4f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.tan.v4f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.tan.v4f32 half4 test_tan_half4 ( half4 p0 ) { return tan ( p0 ); } // CHECK-LABEL: test_tan_float -// CHECK: call float @llvm.tan.f32 +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.tan.f32 float test_tan_float ( float p0 ) { return tan ( p0 ); } // CHECK-LABEL: test_tan_float2 -// CHECK: call <2 x float> @llvm.tan.v2f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.tan.v2f32 float2 test_tan_float2 ( float2 p0 ) { return tan ( p0 ); } // CHECK-LABEL: test_tan_float3 -// CHECK: call <3 x float> @llvm.tan.v3f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.tan.v3f32 float3 test_tan_float3 ( float3 p0 ) { return tan ( p0 ); } // CHECK-LABEL: test_tan_float4 -// CHECK: call <4 x float> @llvm.tan.v4f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.tan.v4f32 float4 test_tan_float4 ( float4 p0 ) { return tan ( p0 ); } diff --git a/clang/test/CodeGenHLSL/builtins/tanh.hlsl b/clang/test/CodeGenHLSL/builtins/tanh.hlsl index 6d09c8b819f91..f947c7f53b110 100644 --- a/clang/test/CodeGenHLSL/builtins/tanh.hlsl +++ b/clang/test/CodeGenHLSL/builtins/tanh.hlsl @@ -7,53 +7,53 @@ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF // CHECK-LABEL: test_tanh_half -// NATIVE_HALF: call half @llvm.tanh.f16 -// NO_HALF: call float @llvm.tanh.f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.tanh.f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.tanh.f32 half test_tanh_half ( half p0 ) { return tanh ( p0 ); } // CHECK-LABEL: test_tanh_half2 -// NATIVE_HALF: call <2 x half> @llvm.tanh.v2f16 -// NO_HALF: call <2 x float> @llvm.tanh.v2f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.tanh.v2f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.tanh.v2f32 half2 test_tanh_half2 ( half2 p0 ) { return tanh ( p0 ); } // CHECK-LABEL: test_tanh_half3 -// NATIVE_HALF: call <3 x half> @llvm.tanh.v3f16 -// NO_HALF: call <3 x float> @llvm.tanh.v3f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.tanh.v3f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.tanh.v3f32 half3 test_tanh_half3 ( half3 p0 ) { return tanh ( p0 ); } // CHECK-LABEL: test_tanh_half4 -// NATIVE_HALF: call <4 x half> @llvm.tanh.v4f16 -// NO_HALF: call <4 x float> @llvm.tanh.v4f32 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.tanh.v4f16 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.tanh.v4f32 half4 test_tanh_half4 ( half4 p0 ) { return tanh ( p0 ); } // CHECK-LABEL: test_tanh_float -// CHECK: call float @llvm.tanh.f32 +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.tanh.f32 float test_tanh_float ( float p0 ) { return tanh ( p0 ); } // CHECK-LABEL: test_tanh_float2 -// CHECK: call <2 x float> @llvm.tanh.v2f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.tanh.v2f32 float2 test_tanh_float2 ( float2 p0 ) { return tanh ( p0 ); } // CHECK-LABEL: test_tanh_float3 -// CHECK: call <3 x float> @llvm.tanh.v3f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.tanh.v3f32 float3 test_tanh_float3 ( float3 p0 ) { return tanh ( p0 ); } // CHECK-LABEL: test_tanh_float4 -// CHECK: call <4 x float> @llvm.tanh.v4f32 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.tanh.v4f32 float4 test_tanh_float4 ( float4 p0 ) { return tanh ( p0 ); } diff --git a/clang/test/CodeGenHLSL/builtins/trunc.hlsl b/clang/test/CodeGenHLSL/builtins/trunc.hlsl index 3da12c88aa7fe..26de5bf94c3cc 100644 --- a/clang/test/CodeGenHLSL/builtins/trunc.hlsl +++ b/clang/test/CodeGenHLSL/builtins/trunc.hlsl @@ -5,42 +5,42 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | \ // RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF -// NATIVE_HALF-LABEL: define noundef half @_Z15test_trunc_half -// NATIVE_HALF: call half @llvm.trunc.f16( -// NO_HALF-LABEL: define noundef float @_Z15test_trunc_half -// NO_HALF: call float @llvm.trunc.f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z15test_trunc_half +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.trunc.f16( +// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z15test_trunc_half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.trunc.f32( half test_trunc_half(half p0) { return trunc(p0); } -// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_trunc_half2 -// NATIVE_HALF: call <2 x half> @llvm.trunc.v2f16 -// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_trunc_half2 -// NO_HALF: call <2 x float> @llvm.trunc.v2f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z16test_trunc_half2 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.trunc.v2f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_trunc_half2 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.trunc.v2f32( half2 test_trunc_half2(half2 p0) { return trunc(p0); } -// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_trunc_half3 -// NATIVE_HALF: call <3 x half> @llvm.trunc.v3f16 -// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_trunc_half3 -// NO_HALF: call <3 x float> @llvm.trunc.v3f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z16test_trunc_half3 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.trunc.v3f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_trunc_half3 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.trunc.v3f32( half3 test_trunc_half3(half3 p0) { return trunc(p0); } -// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_trunc_half4 -// NATIVE_HALF: call <4 x half> @llvm.trunc.v4f16 -// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_trunc_half4 -// NO_HALF: call <4 x float> @llvm.trunc.v4f32( +// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z16test_trunc_half4 +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.trunc.v4f16 +// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_trunc_half4 +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.trunc.v4f32( half4 test_trunc_half4(half4 p0) { return trunc(p0); } -// CHECK-LABEL: define noundef float @_Z16test_trunc_float -// CHECK: call float @llvm.trunc.f32( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z16test_trunc_float +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.trunc.f32( float test_trunc_float(float p0) { return trunc(p0); } -// CHECK-LABEL: define noundef <2 x float> @_Z17test_trunc_float2 -// CHECK: call <2 x float> @llvm.trunc.v2f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z17test_trunc_float2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.trunc.v2f32 float2 test_trunc_float2(float2 p0) { return trunc(p0); } -// CHECK-LABEL: define noundef <3 x float> @_Z17test_trunc_float3 -// CHECK: call <3 x float> @llvm.trunc.v3f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z17test_trunc_float3 +// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.trunc.v3f32 float3 test_trunc_float3(float3 p0) { return trunc(p0); } -// CHECK-LABEL: define noundef <4 x float> @_Z17test_trunc_float4 -// CHECK: call <4 x float> @llvm.trunc.v4f32 +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z17test_trunc_float4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.trunc.v4f32 float4 test_trunc_float4(float4 p0) { return trunc(p0); } diff --git a/clang/test/CodeGenHLSL/float3.hlsl b/clang/test/CodeGenHLSL/float3.hlsl index 767720b049152..4f03464586bf0 100644 --- a/clang/test/CodeGenHLSL/float3.hlsl +++ b/clang/test/CodeGenHLSL/float3.hlsl @@ -3,7 +3,7 @@ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s // Make sure float3 is not changed into float4. -// CHECK:<3 x float> @_Z3fooDv3_f(<3 x float> noundef %[[PARAM:[0-9a-zA-Z]+]]) +// CHECK:<3 x float> @_Z3fooDv3_f(<3 x float> noundef nofpclass(nan inf) %[[PARAM:[0-9a-zA-Z]+]]) // CHECK:%[[A_ADDR:.+]] = alloca <3 x float>, align 16 // CHECK-NEXT:store <3 x float> %[[PARAM]], ptr %[[A_ADDR]], align 16 // CHECK-NEXT:%[[V:[0-9]+]] = load <3 x float>, ptr %[[A_ADDR]], align 16 diff --git a/clang/test/CodeGenHLSL/this-reference.hlsl b/clang/test/CodeGenHLSL/this-reference.hlsl index 66b79d4250012..71a33f4a271a2 100644 --- a/clang/test/CodeGenHLSL/this-reference.hlsl +++ b/clang/test/CodeGenHLSL/this-reference.hlsl @@ -24,7 +24,7 @@ void main() { // CHECK: %call = call noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %Vals) // CHECK-NEXT: %First = getelementptr inbounds nuw %struct.Pair, ptr %Vals, i32 0, i32 0 // CHECK-NEXT: store i32 %call, ptr %First, align 4 - // CHECK-NEXT: %call1 = call noundef float @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %Vals) + // CHECK-NEXT: %call1 = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) float @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %Vals) // CHECK-NEXT: %Second = getelementptr inbounds nuw %struct.Pair, ptr %Vals, i32 0, i32 1 // CHECK: [[Pair:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pair" From 17912f336bd3d3db09b367c155fb44148ea4de24 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 9 Jan 2025 16:05:17 +0000 Subject: [PATCH 03/79] LAA: refactor dependence class to prep for scaled strides (NFC) (#122113) Rearrange the DepDistanceAndSizeInfo struct in preparation to scale strides. getDependenceDistanceStrideAndSize now returns the data of CommonStride, MaxStride, and clarifies when to retry with runtime checks, in place of (unscaled) strides. --- .../llvm/Analysis/LoopAccessAnalysis.h | 22 ++++++++--- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 37 +++++++++++++------ 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index a35bc7402d1a8..31374a128856c 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -366,16 +366,26 @@ class MemoryDepChecker { struct DepDistanceStrideAndSizeInfo { const SCEV *Dist; - uint64_t StrideA; - uint64_t StrideB; + + /// Strides could either be scaled (in bytes, taking the size of the + /// underlying type into account), or unscaled (in indexing units; unscaled + /// stride = scaled stride / size of underlying type). Here, strides are + /// unscaled. + uint64_t MaxStride; + std::optional CommonStride; + + bool ShouldRetryWithRuntimeCheck; uint64_t TypeByteSize; bool AIsWrite; bool BIsWrite; - DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA, - uint64_t StrideB, uint64_t TypeByteSize, - bool AIsWrite, bool BIsWrite) - : Dist(Dist), StrideA(StrideA), StrideB(StrideB), + DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride, + std::optional CommonStride, + bool ShouldRetryWithRuntimeCheck, + uint64_t TypeByteSize, bool AIsWrite, + bool BIsWrite) + : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride), + ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck), TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} }; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 2c75d5625cb66..38e9145826c08 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1994,8 +1994,23 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); if (!HasSameSize) TypeByteSize = 0; - return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt), - std::abs(StrideBPtrInt), TypeByteSize, + + StrideAPtrInt = std::abs(StrideAPtrInt); + StrideBPtrInt = std::abs(StrideBPtrInt); + + uint64_t MaxStride = std::max(StrideAPtrInt, StrideBPtrInt); + + std::optional CommonStride; + if (StrideAPtrInt == StrideBPtrInt) + CommonStride = StrideAPtrInt; + + // TODO: Historically, we don't retry with runtime checks unless the + // (unscaled) strides are the same. Fix this once the condition for runtime + // checks in isDependent is fixed. + bool ShouldRetryWithRuntimeCheck = CommonStride.has_value(); + + return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride, + ShouldRetryWithRuntimeCheck, TypeByteSize, AIsWrite, BIsWrite); } @@ -2011,23 +2026,21 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (std::holds_alternative(Res)) return std::get(Res); - auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] = + auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck, + TypeByteSize, AIsWrite, BIsWrite] = std::get(Res); bool HasSameSize = TypeByteSize > 0; - std::optional CommonStride = - StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt; if (isa(Dist)) { - // TODO: Relax requirement that there is a common stride to retry with - // non-constant distance dependencies. - FoundNonConstantDistanceDependence |= CommonStride.has_value(); + // TODO: Relax requirement that there is a common unscaled stride to retry + // with non-constant distance dependencies. + FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n"); return Dependence::Unknown; } ScalarEvolution &SE = *PSE.getSE(); auto &DL = InnermostLoop->getHeader()->getDataLayout(); - uint64_t MaxStride = std::max(StrideA, StrideB); // If the distance between the acecsses is larger than their maximum absolute // stride multiplied by the symbolic maximum backedge taken count (which is an @@ -2086,7 +2099,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // condition to consider retrying with runtime checks. Historically, we // did not set it when strides were different but there is no inherent // reason to. - FoundNonConstantDistanceDependence |= CommonStride.has_value(); + FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; return Dependence::Unknown; } if (!HasSameSize || @@ -2105,7 +2118,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue(); // Below we only handle strictly positive distances. if (MinDistance <= 0) { - FoundNonConstantDistanceDependence |= CommonStride.has_value(); + FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; return Dependence::Unknown; } @@ -2118,7 +2131,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // condition to consider retrying with runtime checks. Historically, we // did not set it when strides were different but there is no inherent // reason to. - FoundNonConstantDistanceDependence |= CommonStride.has_value(); + FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; } if (!HasSameSize) { From fb03ce7aadd997486e2709051290756e09ad3d01 Mon Sep 17 00:00:00 2001 From: LoS Date: Thu, 9 Jan 2025 17:22:46 +0100 Subject: [PATCH 04/79] [libc++] Fix test for vector data_const.pass.cpp (#122085) The test contained some non-const accesses when we're really trying to test that the method is const. --- .../sequences/vector/vector.data/data_const.pass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp index 885caf272afbf..0cf0b22047352 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp @@ -39,7 +39,7 @@ TEST_CONSTEXPR_CXX20 bool tests() assert(is_contiguous_container_asan_correct(v)); } { - std::vector v(100); + const std::vector v(100); assert(v.data() == std::addressof(v.front())); assert(is_contiguous_container_asan_correct(v)); } @@ -55,7 +55,7 @@ TEST_CONSTEXPR_CXX20 bool tests() assert(is_contiguous_container_asan_correct(v)); } { - std::vector> v(100); + const std::vector> v(100); assert(v.data() == std::addressof(v.front())); assert(is_contiguous_container_asan_correct(v)); } @@ -70,7 +70,7 @@ TEST_CONSTEXPR_CXX20 bool tests() assert(is_contiguous_container_asan_correct(v)); } { - std::vector> v(100); + const std::vector> v(100); assert(v.data() == std::addressof(v.front())); assert(is_contiguous_container_asan_correct(v)); } From d056c756aea3fc709cf7d6bc8acabe9a8c7218db Mon Sep 17 00:00:00 2001 From: Alexander Belyaev <32522095+pifon2a@users.noreply.github.com> Date: Thu, 9 Jan 2025 17:31:17 +0100 Subject: [PATCH 05/79] [mlir][scf] Fix unrolling when the yielded value is defined above the loop. (#122177) --- mlir/lib/Dialect/SCF/Utils/Utils.cpp | 2 +- mlir/test/Dialect/SCF/loop-unroll.mlir | 29 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index 6cda7100fe073..fa82bcb816a2a 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -361,7 +361,7 @@ static void generateUnrolledLoop( // Update yielded values. for (unsigned i = 0, e = lastYielded.size(); i < e; i++) - lastYielded[i] = operandMap.lookup(yieldedValues[i]); + lastYielded[i] = operandMap.lookupOrDefault(yieldedValues[i]); } // Make sure we annotate the Ops in the original body. We do this last so that diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir index 68a11fb6a72c6..0368505a1b70d 100644 --- a/mlir/test/Dialect/SCF/loop-unroll.mlir +++ b/mlir/test/Dialect/SCF/loop-unroll.mlir @@ -489,3 +489,32 @@ func.func @static_loop_unroll_with_integer_iv() -> (f32, f32) { // UNROLL-BY-3-NEXT: scf.yield %[[EADD]], %[[EMUL]] : f32, f32 // UNROLL-BY-3-NEXT: } // UNROLL-BY-3-NEXT: return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32 + +// ----- + +// Test loop unrolling when the yielded value is defined above the loop. +func.func @loop_unroll_static_yield_value_defined_above(%init: i32) { + %c42 = arith.constant 42 : i32 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %103:2 = scf.for %i = %c0 to %c4 step %c1 + iter_args(%iter1 = %c42, %iter2 = %init) -> (i32, i32) { + %0 = arith.andi %iter2, %iter1 : i32 + scf.yield %0, %init : i32, i32 + } + return +} +// UNROLL-OUTER-BY-2-LABEL: @loop_unroll_static_yield_value_defined_above( +// UNROLL-OUTER-BY-2-SAME: %[[INIT:.*]]: i32) { +// UNROLL-OUTER-BY-2-DAG: %[[C42:.*]] = arith.constant 42 : i32 +// UNROLL-OUTER-BY-2-DAG: %[[C0:.*]] = arith.constant 0 : index +// UNROLL-OUTER-BY-2-DAG: %[[C4:.*]] = arith.constant 4 : index +// UNROLL-OUTER-BY-2-DAG: %[[C2:.*]] = arith.constant 2 : index +// UNROLL-OUTER-BY-2: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C2]] +// UNROLL-OUTER-BY-2-SAME: iter_args(%[[ITER1:.*]] = %[[C42]], +// UNROLL-OUTER-BY-2-SAME: %[[ITER2:.*]] = %[[INIT]]) +// UNROLL-OUTER-BY-2: %[[SUM:.*]] = arith.andi %[[ITER2]], %[[ITER1]] +// UNROLL-OUTER-BY-2: %[[SUM1:.*]] = arith.andi %[[INIT]], %[[SUM]] +// UNROLL-OUTER-BY-2: scf.yield %[[SUM1]], %[[INIT]] : i32, i32 + From 5e6c74d086848d8525b2bdd3b3e717c64b576193 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Fri, 10 Jan 2025 01:37:57 +0900 Subject: [PATCH 06/79] [clang] Fix SPIR-V OpenMP test assembler check (#122310) Testing is failing in HEAD, Integration issue with the SPIR-V OpenMP change and the SPIR-V change adding the assembler job. I notice there is a consistency issue in the binding name "SPIRV" vs "SPIR-V", I will fix that in a separate commit so we unbreak build ASAP. Signed-off-by: Sarnie, Nick --- clang/test/Driver/spirv-openmp-toolchain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/spirv-openmp-toolchain.c b/clang/test/Driver/spirv-openmp-toolchain.c index 3a94d978c2d70..3d585d78e86c2 100644 --- a/clang/test/Driver/spirv-openmp-toolchain.c +++ b/clang/test/Driver/spirv-openmp-toolchain.c @@ -45,7 +45,7 @@ // CHECK-BINDINGS-TEMPS: "spirv64-intel" - "clang", inputs: ["[[INPUT]]"], output: "[[DEVICE_PP:.+]]" // CHECK-BINDINGS-TEMPS: "spirv64-intel" - "clang", inputs: ["[[DEVICE_PP]]", "[[HOST_BC]]"], output: "[[DEVICE_TEMP_BC:.+]]" // CHECK-BINDINGS-TEMPS: "spirv64-intel" - "SPIR-V::Translator", inputs: ["[[DEVICE_TEMP_BC]]"], output: "[[DEVICE_ASM:.+]]" -// CHECK-BINDINGS-TEMPS: "spirv64-intel" - "SPIR-V::Translator", inputs: ["[[DEVICE_ASM]]"], output: "[[DEVICE_SPV:.+]]" +// CHECK-BINDINGS-TEMPS: "spirv64-intel" - "SPIRV::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "[[DEVICE_SPV:.+]]" // CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_SPV]]"], output: "[[DEVICE_IMAGE:.+]]" // CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_IMAGE]]"], output: "[[HOST_ASM:.+]]" // CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "clang::as", inputs: ["[[HOST_ASM]]"], output: "[[HOST_OBJ:.+]]" From 473510abf5c6a2f9d74a0c19f0f5d8b483596e05 Mon Sep 17 00:00:00 2001 From: "Sv. Lockal" Date: Fri, 10 Jan 2025 00:40:36 +0800 Subject: [PATCH 07/79] [libc++] Fix mi-mode in GDB pretty printers (#120951) GDB/MI requires unique names for each child, otherwise fails with "Duplicate variable object name". Also wrapped containers printers were flattened for cleaner visualization in IDEs and CLI. Fixes #62340 --- .../libcxx/gdb/gdb_pretty_printer_test.py | 56 +++++++++++++---- .../libcxx/gdb/gdb_pretty_printer_test.sh.cpp | 63 +++++++++++++------ libcxx/utils/gdb/libcxx/printers.py | 41 +++++++----- 3 files changed, 112 insertions(+), 48 deletions(-) diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py index 07154d001d3a1..254a61a8c633e 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py @@ -15,6 +15,7 @@ """ from __future__ import print_function +import json import re import gdb import sys @@ -29,6 +30,7 @@ # we exit. has_run_tests = False +has_execute_mi = tuple(map(int, gdb.VERSION.split("."))) >= (14, 2) class CheckResult(gdb.Command): def __init__(self): @@ -42,36 +44,43 @@ def invoke(self, arg, from_tty): # Stack frame is: # 0. StopForDebugger - # 1. ComparePrettyPrintToChars or ComparePrettyPrintToRegex + # 1. CompareListChildrenToChars, ComparePrettyPrintToChars or ComparePrettyPrintToRegex # 2. TestCase compare_frame = gdb.newest_frame().older() testcase_frame = compare_frame.older() test_loc = testcase_frame.find_sal() + test_loc_str = test_loc.symtab.filename + ":" + str(test_loc.line) # Use interactive commands in the correct context to get the pretty # printed version - value_str = self._get_value_string(compare_frame, testcase_frame) + frame_name = compare_frame.name() + if frame_name.startswith("CompareListChildren"): + if has_execute_mi: + value = self._get_children(compare_frame) + else: + print("SKIPPED: " + test_loc_str) + return + else: + value = self._get_value(compare_frame, testcase_frame) - # Ignore the convenience variable name and newline - value = value_str[value_str.find("= ") + 2 : -1] gdb.newest_frame().select() expectation_val = compare_frame.read_var("expectation") check_literal = expectation_val.string(encoding="utf-8") - if "PrettyPrintToRegex" in compare_frame.name(): + if "PrettyPrintToRegex" in frame_name: test_fails = not re.search(check_literal, value) else: test_fails = value != check_literal if test_fails: global test_failures - print("FAIL: " + test_loc.symtab.filename + ":" + str(test_loc.line)) + print("FAIL: " + test_loc_str) print("GDB printed:") print(" " + repr(value)) print("Value should match:") print(" " + repr(check_literal)) test_failures += 1 else: - print("PASS: " + test_loc.symtab.filename + ":" + str(test_loc.line)) + print("PASS: " + test_loc_str) except RuntimeError as e: # At this point, lots of different things could be wrong, so don't try to @@ -81,9 +90,28 @@ def invoke(self, arg, from_tty): print(str(e)) test_failures += 1 - def _get_value_string(self, compare_frame, testcase_frame): + def _get_children(self, compare_frame): + compare_frame.select() + gdb.execute_mi("-var-create", "value", "*", "value") + r = gdb.execute_mi("-var-list-children", "--simple-values", "value") + gdb.execute_mi("-var-delete", "value") + children = r["children"] + if r["displayhint"] == "map": + r = [ + { + "key": json.loads(children[2 * i]["value"]), + "value": json.loads(children[2 * i + 1]["value"]), + } + for i in range(len(children) // 2) + ] + else: + r = [json.loads(el["value"]) for el in children] + return json.dumps(r, sort_keys=True) + + def _get_value(self, compare_frame, testcase_frame): compare_frame.select() - if "ComparePrettyPrint" in compare_frame.name(): + frame_name = compare_frame.name() + if frame_name.startswith("ComparePrettyPrint"): s = gdb.execute("p value", to_string=True) else: value_str = str(compare_frame.read_var("value")) @@ -91,8 +119,10 @@ def _get_value_string(self, compare_frame, testcase_frame): testcase_frame.select() s = gdb.execute("p " + clean_expression_str, to_string=True) if sys.version_info.major == 2: - return s.decode("utf-8") - return s + s = s.decode("utf-8") + + # Ignore the convenience variable name and newline + return s[s.find("= ") + 2 : -1] def exit_handler(event=None): @@ -112,6 +142,10 @@ def exit_handler(event=None): # Disable terminal paging gdb.execute("set height 0") gdb.execute("set python print-stack full") + +if has_execute_mi: + gdb.execute_mi("-enable-pretty-printing") + test_failures = 0 CheckResult() test_bp = gdb.Breakpoint("StopForDebugger") diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index 9d4b7039402a4..ff951d94db0a4 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -129,6 +129,12 @@ void CompareExpressionPrettyPrintToRegex( StopForDebugger(&value, &expectation); } +template +void CompareListChildrenToChars(TypeToPrint value, const char* expectation) { + MarkAsLive(value); + StopForDebugger(&value, &expectation); +} + namespace example { struct example_struct { int a = 0; @@ -361,28 +367,28 @@ void multimap_test() { void queue_test() { std::queue i_am_empty; - ComparePrettyPrintToChars(i_am_empty, - "std::queue wrapping = {std::deque is empty}"); + ComparePrettyPrintToChars(i_am_empty, "std::queue wrapping: std::deque is empty"); std::queue one_two_three(std::deque{1, 2, 3}); - ComparePrettyPrintToChars(one_two_three, - "std::queue wrapping = {" - "std::deque with 3 elements = {1, 2, 3}}"); + ComparePrettyPrintToChars( + one_two_three, + "std::queue wrapping: " + "std::deque with 3 elements = {1, 2, 3}"); } void priority_queue_test() { std::priority_queue i_am_empty; - ComparePrettyPrintToChars(i_am_empty, - "std::priority_queue wrapping = {std::vector of length 0, capacity 0}"); + ComparePrettyPrintToChars(i_am_empty, "std::priority_queue wrapping: std::vector of length 0, capacity 0"); std::priority_queue one_two_three; one_two_three.push(11111); one_two_three.push(22222); one_two_three.push(33333); - ComparePrettyPrintToRegex(one_two_three, - R"(std::priority_queue wrapping = )" - R"({std::vector of length 3, capacity 3 = {33333)"); + ComparePrettyPrintToRegex( + one_two_three, + R"(std::priority_queue wrapping: )" + R"(std::vector of length 3, capacity 3 = {33333)"); ComparePrettyPrintToRegex(one_two_three, ".*11111.*"); ComparePrettyPrintToRegex(one_two_three, ".*22222.*"); @@ -410,25 +416,22 @@ void set_test() { void stack_test() { std::stack test0; - ComparePrettyPrintToChars(test0, - "std::stack wrapping = {std::deque is empty}"); + ComparePrettyPrintToChars(test0, "std::stack wrapping: std::deque is empty"); test0.push(5); test0.push(6); - ComparePrettyPrintToChars( - test0, "std::stack wrapping = {std::deque with 2 elements = {5, 6}}"); + ComparePrettyPrintToChars(test0, "std::stack wrapping: std::deque with 2 elements = {5, 6}"); std::stack test1; test1.push(true); test1.push(false); - ComparePrettyPrintToChars( - test1, - "std::stack wrapping = {std::deque with 2 elements = {true, false}}"); + ComparePrettyPrintToChars(test1, "std::stack wrapping: std::deque with 2 elements = {true, false}"); std::stack test2; test2.push("Hello"); test2.push("World"); - ComparePrettyPrintToChars(test2, - "std::stack wrapping = {std::deque with 2 elements " - "= {\"Hello\", \"World\"}}"); + ComparePrettyPrintToChars( + test2, + "std::stack wrapping: std::deque with 2 elements " + "= {\"Hello\", \"World\"}"); } void multiset_test() { @@ -662,6 +665,25 @@ void streampos_test() { ComparePrettyPrintToRegex(test1, "^std::fpos with stream offset:5( with state: {count:0 value:0})?$"); } +void mi_mode_test() { + std::map one_two_three_map; + one_two_three_map.insert({1, "one"}); + one_two_three_map.insert({2, "two"}); + one_two_three_map.insert({3, "three"}); + CompareListChildrenToChars( + one_two_three_map, R"([{"key": 1, "value": "one"}, {"key": 2, "value": "two"}, {"key": 3, "value": "three"}])"); + + std::unordered_map one_two_three_umap; + one_two_three_umap.insert({3, "three"}); + one_two_three_umap.insert({2, "two"}); + one_two_three_umap.insert({1, "one"}); + CompareListChildrenToChars( + one_two_three_umap, R"([{"key": 3, "value": "three"}, {"key": 2, "value": "two"}, {"key": 1, "value": "one"}])"); + + std::deque one_two_three_deque{1, 2, 3}; + CompareListChildrenToChars(one_two_three_deque, "[1, 2, 3]"); +} + int main(int, char**) { framework_self_test(); @@ -694,5 +716,6 @@ int main(int, char**) { unordered_set_iterator_test(); pointer_negative_test(); streampos_test(); + mi_mode_test(); return 0; } diff --git a/libcxx/utils/gdb/libcxx/printers.py b/libcxx/utils/gdb/libcxx/printers.py index 8e74b93e7b121..31c27a1959cb2 100644 --- a/libcxx/utils/gdb/libcxx/printers.py +++ b/libcxx/utils/gdb/libcxx/printers.py @@ -446,10 +446,13 @@ def _list_it(self): num_emitted = 0 current_addr = self.start_ptr start_index = self.first_block_start_index + i = 0 while num_emitted < self.size: end_index = min(start_index + self.size - num_emitted, self.block_size) for _, elem in self._bucket_it(current_addr, start_index, end_index): - yield "", elem + key_name = "[%d]" % i + i += 1 + yield key_name, elem num_emitted += end_index - start_index current_addr = gdb.Value(addr_as_long(current_addr) + _pointer_size).cast( self.node_type @@ -494,8 +497,8 @@ def to_string(self): def _list_iter(self): current_node = self.first_node - for _ in range(self.size): - yield "", current_node.cast(self.nodetype).dereference()["__value_"] + for i in range(self.size): + yield "[%d]" % i, current_node.cast(self.nodetype).dereference()["__value_"] current_node = current_node.dereference()["__next_"] def __iter__(self): @@ -512,15 +515,14 @@ class StdQueueOrStackPrinter(object): """Print a std::queue or std::stack.""" def __init__(self, val): - self.val = val - self.underlying = val["c"] + self.typename = _remove_generics(_prettify_typename(val.type)) + self.visualizer = gdb.default_visualizer(val["c"]) def to_string(self): - typename = _remove_generics(_prettify_typename(self.val.type)) - return "%s wrapping" % typename + return "%s wrapping: %s" % (self.typename, self.visualizer.to_string()) def children(self): - return iter([("", self.underlying)]) + return self.visualizer.children() def display_hint(self): return "array" @@ -530,19 +532,18 @@ class StdPriorityQueuePrinter(object): """Print a std::priority_queue.""" def __init__(self, val): - self.val = val - self.underlying = val["c"] + self.typename = _remove_generics(_prettify_typename(val.type)) + self.visualizer = gdb.default_visualizer(val["c"]) def to_string(self): # TODO(tamur): It would be nice to print the top element. The technical # difficulty is that, the implementation refers to the underlying # container, which is a generic class. libstdcxx pretty printers do not # print the top element. - typename = _remove_generics(_prettify_typename(self.val.type)) - return "%s wrapping" % typename + return "%s wrapping: %s" % (self.typename, self.visualizer.to_string()) def children(self): - return iter([("", self.underlying)]) + return self.visualizer.children() def display_hint(self): return "array" @@ -622,13 +623,16 @@ def _traverse(self): """Traverses the binary search tree in order.""" current = self.util.root skip_left_child = False + i = 0 while True: if not skip_left_child and self.util.left_child(current): current = self.util.left_child(current) continue skip_left_child = False for key_value in self._get_key_value(current): - yield "", key_value + key_name = "[%d]" % i + i += 1 + yield key_name, key_value right_child = self.util.right_child(current) if right_child: current = right_child @@ -784,10 +788,13 @@ def __init__(self, val): def _list_it(self, sentinel_ptr): next_ptr = sentinel_ptr["__next_"] + i = 0 while str(next_ptr.cast(_void_pointer_type)) != "0x0": next_val = next_ptr.cast(self.cast_type).dereference() for key_value in self._get_key_value(next_val): - yield "", key_value + key_name = "[%d]" % i + i += 1 + yield key_name, key_value next_ptr = next_val["__next_"] def to_string(self): @@ -851,8 +858,8 @@ def children(self): return self if self.addr else iter(()) def __iter__(self): - for key_value in self._get_key_value(): - yield "", key_value + for i, key_value in enumerate(self._get_key_value()): + yield "[%d]" % i, key_value class StdUnorderedSetIteratorPrinter(AbstractHashMapIteratorPrinter): From 7724be972858477d9d4552f5fa2edb5222bff9e0 Mon Sep 17 00:00:00 2001 From: Andrea Faulds Date: Thu, 9 Jan 2025 17:58:51 +0100 Subject: [PATCH 08/79] [mlir][spirv] Do SPIR-V serialization in -test-vulkan-runner-pipeline (#121494) This commit is a further incremental step toward moving the whole mlir-vulkan-runner MLIR pass pipeline into mlir-opt (see #73457). The previous step was b225b3adf7b78387c9fcb97a3ff0e0a1e26eafe2, which moved all device passes prior to SPIR-V serialization into a new mlir-opt test pass, `-test-vulkan-runner-pipeline`. This commit changes how SPIR-V serialization is accomplished for Vulkan runner tests. Until now, this was done by the Vulkan-specific ConvertGpuLaunchFuncToVulkanLaunchFunc pass. With this commit, this responsibility is removed from that pass, and is instead done with the existing generic GpuModuleToBinaryPass. In addition, the SPIR-V serialization step is no longer done inside mlir-vulkan-runner, but rather inside mlir-opt (in the `-test-vulkan-runner-pipeline` pass). Both of these changes represent a greater alignment between mlir-vulkan-runner and the other GPU integration tests. Notably, the IR shapes produced by the mlir-opt pipelines for the Vulkan and SYCL runners are now much more similar, with both using a gpu.binary op for the serialized SPIR-V kernel. In order to enable this, this commit includes these supporting changes: - ConvertToSPIRVPass is enhanced to support producing the IR shape where a spirv.module is nested inside a gpu.module, since this is what GpuModuleToBinaryPass expects. - ConvertGPULaunchFuncToVulkanLaunchFunc is changed to remove its SPIR-V serialization functionality, and instead now extracts the SPIR-V from a gpu.binary operation (as produced by ConvertToSPIRVPass). - `-test-vulkan-runner-pipeline` now attaches SPIR-V target information required by GpuModuleToBinaryPass. - The WebGPU pass option, which had been removed from mlir-vulkan-runner in the previous commit in this series, is restored as an option to `-test-vulkan-runner-pipeline` instead, so that the WebGPU pass continues being inserted into the pipeline just before SPIR-V serialization. --- mlir/include/mlir/Conversion/Passes.td | 5 +- .../ConvertToSPIRV/ConvertToSPIRVPass.cpp | 5 +- .../Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp | 2 +- ...ConvertGPULaunchFuncToVulkanLaunchFunc.cpp | 64 +++++++++++-------- .../convert-gpu-modules-nested.mlir | 30 +++++++++ .../lower-gpu-launch-vulkan-launch.mlir | 28 ++++---- .../lib/Pass/TestVulkanRunnerPipeline.cpp | 34 ++++++++-- .../mlir-vulkan-runner/addui_extended.mlir | 2 +- .../mlir-vulkan-runner/smul_extended.mlir | 2 +- .../mlir-vulkan-runner/umul_extended.mlir | 2 +- 10 files changed, 121 insertions(+), 53 deletions(-) create mode 100644 mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules-nested.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 58ee87cf82039..afeed370ce347 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -61,7 +61,10 @@ def ConvertToSPIRVPass : Pass<"convert-to-spirv"> { "Run vector unrolling to convert vector types in function bodies">, Option<"convertGPUModules", "convert-gpu-modules", "bool", /*default=*/"false", - "Clone and convert GPU modules"> + "Clone and convert GPU modules">, + Option<"nestInGPUModule", "nest-in-gpu-module", "bool", + /*default=*/"false", + "Put converted SPIR-V module inside the gpu.module instead of alongside it.">, ]; } diff --git a/mlir/lib/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.cpp b/mlir/lib/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.cpp index 4b7f7ff114dee..ab9c048f56106 100644 --- a/mlir/lib/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.cpp +++ b/mlir/lib/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.cpp @@ -108,7 +108,10 @@ struct ConvertToSPIRVPass final SmallVector gpuModules; OpBuilder builder(context); op->walk([&](gpu::GPUModuleOp gpuModule) { - builder.setInsertionPoint(gpuModule); + if (nestInGPUModule) + builder.setInsertionPointToStart(gpuModule.getBody()); + else + builder.setInsertionPoint(gpuModule); gpuModules.push_back(builder.clone(*gpuModule)); }); // Run conversion for each module independently as they can have diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp index 08b451f7d5b32..509b6343057b9 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp @@ -71,7 +71,7 @@ void GPUToSPIRVPass::runOnOperation() { // launch op still needs the original GPU kernel module. // For Vulkan Shader capabilities, we insert the newly converted SPIR-V // module right after the original GPU module, as that's the expectation of - // the in-tree Vulkan runner. + // the in-tree SPIR-V CPU runner (the Vulkan runner does not use this pass). // For OpenCL Kernel capabilities, we insert the newly converted SPIR-V // module inside the original GPU module, as that's the expectaion of the // normal GPU compilation pipeline. diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp index 2d2251672230b..8488fac69e8e3 100644 --- a/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp +++ b/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp @@ -7,9 +7,8 @@ //===----------------------------------------------------------------------===// // // This file implements a pass to convert gpu launch function into a vulkan -// launch function. Creates a SPIR-V binary shader from the `spirv::ModuleOp` -// using `spirv::serialize` function, attaches binary data and entry point name -// as an attributes to vulkan launch call op. +// launch function. Extracts the SPIR-V from a `gpu::BinaryOp` and attaches it +// along with the entry point name as attributes to a Vulkan launch call op. // //===----------------------------------------------------------------------===// @@ -40,10 +39,9 @@ static constexpr const char *kVulkanLaunch = "vulkanLaunch"; namespace { -/// A pass to convert gpu launch op to vulkan launch call op, by creating a -/// SPIR-V binary shader from `spirv::ModuleOp` using `spirv::serialize` -/// function and attaching binary data and entry point name as an attributes to -/// created vulkan launch call op. +/// A pass to convert gpu launch op to vulkan launch call op, by extracting a +/// SPIR-V binary shader from a `gpu::BinaryOp` and attaching binary data and +/// entry point name as an attributes to created vulkan launch call op. class ConvertGpuLaunchFuncToVulkanLaunchFunc : public impl::ConvertGpuLaunchFuncToVulkanLaunchFuncBase< ConvertGpuLaunchFuncToVulkanLaunchFunc> { @@ -51,10 +49,9 @@ class ConvertGpuLaunchFuncToVulkanLaunchFunc void runOnOperation() override; private: - /// Creates a SPIR-V binary shader from the given `module` using - /// `spirv::serialize` function. - LogicalResult createBinaryShader(ModuleOp module, - std::vector &binaryShader); + /// Extracts a SPIR-V binary shader from the given `module`, if any. + /// Note that this also removes the binary from the IR. + FailureOr getBinaryShader(ModuleOp module); /// Converts the given `launchOp` to vulkan launch call. void convertGpuLaunchFunc(gpu::LaunchFuncOp launchOp); @@ -135,22 +132,35 @@ LogicalResult ConvertGpuLaunchFuncToVulkanLaunchFunc::declareVulkanLaunchFunc( return success(); } -LogicalResult ConvertGpuLaunchFuncToVulkanLaunchFunc::createBinaryShader( - ModuleOp module, std::vector &binaryShader) { +FailureOr +ConvertGpuLaunchFuncToVulkanLaunchFunc::getBinaryShader(ModuleOp module) { bool done = false; - SmallVector binary; - for (auto spirvModule : module.getOps()) { + StringAttr binaryAttr; + gpu::BinaryOp binaryToErase; + for (auto gpuBinary : module.getOps()) { if (done) - return spirvModule.emitError("should only contain one 'spirv.module' op"); + return gpuBinary.emitError("should only contain one 'gpu.binary' op"); done = true; - if (failed(spirv::serialize(spirvModule, binary))) - return failure(); + ArrayRef objects = gpuBinary.getObjectsAttr().getValue(); + if (objects.size() != 1) + return gpuBinary.emitError("should only contain a single object"); + + auto object = cast(objects[0]); + + if (!isa(object.getTarget())) + return gpuBinary.emitError( + "should contain an object with a SPIR-V target environment"); + + binaryAttr = object.getObject(); + binaryToErase = gpuBinary; } - binaryShader.resize(binary.size() * sizeof(uint32_t)); - std::memcpy(binaryShader.data(), reinterpret_cast(binary.data()), - binaryShader.size()); - return success(); + if (!done) + return module.emitError("should contain a 'gpu.binary' op"); + + // Remove the binary to avoid confusing later conversion passes. + binaryToErase.erase(); + return binaryAttr; } void ConvertGpuLaunchFuncToVulkanLaunchFunc::convertGpuLaunchFunc( @@ -159,9 +169,9 @@ void ConvertGpuLaunchFuncToVulkanLaunchFunc::convertGpuLaunchFunc( OpBuilder builder(launchOp); Location loc = launchOp.getLoc(); - // Serialize `spirv::Module` into binary form. - std::vector binary; - if (failed(createBinaryShader(module, binary))) + FailureOr binaryAttr = getBinaryShader(module); + // Extract SPIR-V from `gpu.binary` op. + if (failed(binaryAttr)) return signalPassFailure(); // Declare vulkan launch function. @@ -182,9 +192,7 @@ void ConvertGpuLaunchFuncToVulkanLaunchFunc::convertGpuLaunchFunc( vulkanLaunchOperands); // Set SPIR-V binary shader data as an attribute. - vulkanLaunchCallOp->setAttr( - kSPIRVBlobAttrName, - builder.getStringAttr(StringRef(binary.data(), binary.size()))); + vulkanLaunchCallOp->setAttr(kSPIRVBlobAttrName, *binaryAttr); // Set entry point name as an attribute. vulkanLaunchCallOp->setAttr(kSPIRVEntryPointAttrName, diff --git a/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules-nested.mlir b/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules-nested.mlir new file mode 100644 index 0000000000000..7562de17c606e --- /dev/null +++ b/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules-nested.mlir @@ -0,0 +1,30 @@ +// RUN: mlir-opt -convert-to-spirv="convert-gpu-modules=true nest-in-gpu-module=true run-signature-conversion=false run-vector-unrolling=false" %s | FileCheck %s + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + // CHECK-LABEL: func.func @main + // CHECK: %[[C1:.*]] = arith.constant 1 : index + // CHECK: gpu.launch_func @[[$KERNELS_1:.*]]::@[[$BUILTIN_WG_ID_X:.*]] blocks in (%[[C1]], %[[C1]], %[[C1]]) threads in (%[[C1]], %[[C1]], %[[C1]]) + func.func @main() { + %c1 = arith.constant 1 : index + gpu.launch_func @kernels_1::@builtin_workgroup_id_x + blocks in (%c1, %c1, %c1) threads in (%c1, %c1, %c1) + return + } + + // CHECK: gpu.module @[[$KERNELS_1]] + // CHECK: spirv.module @{{.*}} Logical GLSL450 + // CHECK: spirv.func @[[$BUILTIN_WG_ID_X]] + // CHECK: spirv.mlir.addressof + // CHECK: spirv.Load "Input" + // CHECK: spirv.CompositeExtract + gpu.module @kernels_1 { + gpu.func @builtin_workgroup_id_x() kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + %0 = gpu.block_id x + gpu.return + } + } +} diff --git a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir index 665d0a33abedc..96ee1866517e6 100644 --- a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir +++ b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir @@ -1,24 +1,24 @@ -// RUN: mlir-opt %s -convert-gpu-launch-to-vulkan-launch | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Shader exts=SPV_KHR_storage_buffer_storage_class},gpu-module-to-binary,convert-gpu-launch-to-vulkan-launch)' | FileCheck %s // CHECK: %[[resource:.*]] = memref.alloc() : memref<12xf32> // CHECK: %[[index:.*]] = arith.constant 1 : index // CHECK: call @vulkanLaunch(%[[index]], %[[index]], %[[index]], %[[resource]]) {spirv_blob = "{{.*}}", spirv_element_types = [f32], spirv_entry_point = "kernel"} module attributes {gpu.container_module} { - spirv.module Logical GLSL450 requires #spirv.vce { - spirv.GlobalVariable @kernel_arg_0 bind(0, 0) : !spirv.ptr [0])>, StorageBuffer> - spirv.func @kernel() "None" attributes {workgroup_attributions = 0 : i64} { - %0 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr [0])>, StorageBuffer> - %2 = spirv.Constant 0 : i32 - %3 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr [0])>, StorageBuffer> - %4 = spirv.AccessChain %0[%2, %2] : !spirv.ptr [0])>, StorageBuffer>, i32, i32 -> !spirv.ptr - %5 = spirv.Load "StorageBuffer" %4 : f32 - spirv.Return - } - spirv.EntryPoint "GLCompute" @kernel - spirv.ExecutionMode @kernel "LocalSize", 1, 1, 1 - } gpu.module @kernels { + spirv.module Logical GLSL450 requires #spirv.vce { + spirv.GlobalVariable @kernel_arg_0 bind(0, 0) : !spirv.ptr [0])>, StorageBuffer> + spirv.func @kernel() "None" attributes {workgroup_attributions = 0 : i64} { + %0 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr [0])>, StorageBuffer> + %2 = spirv.Constant 0 : i32 + %3 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr [0])>, StorageBuffer> + %4 = spirv.AccessChain %0[%2, %2] : !spirv.ptr [0])>, StorageBuffer>, i32, i32 -> !spirv.ptr + %5 = spirv.Load "StorageBuffer" %4 : f32 + spirv.Return + } + spirv.EntryPoint "GLCompute" @kernel + spirv.ExecutionMode @kernel "LocalSize", 1, 1, 1 + } gpu.func @kernel(%arg0: memref<12xf32>) kernel { gpu.return } diff --git a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp index eda9aa9f9efef..789c4d76cee0d 100644 --- a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp +++ b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp @@ -12,33 +12,57 @@ #include "mlir/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.h" #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Dialect/MemRef/Transforms/Passes.h" #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" #include "mlir/Dialect/SPIRV/Transforms/Passes.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Pass/PassOptions.h" using namespace mlir; namespace { -void buildTestVulkanRunnerPipeline(OpPassManager &passManager) { +struct VulkanRunnerPipelineOptions + : PassPipelineOptions { + Option spirvWebGPUPrepare{ + *this, "spirv-webgpu-prepare", + llvm::cl::desc("Run MLIR transforms used when targetting WebGPU")}; +}; + +void buildTestVulkanRunnerPipeline(OpPassManager &passManager, + const VulkanRunnerPipelineOptions &options) { passManager.addPass(createGpuKernelOutliningPass()); passManager.addPass(memref::createFoldMemRefAliasOpsPass()); + GpuSPIRVAttachTargetOptions attachTargetOptions{}; + attachTargetOptions.spirvVersion = "v1.0"; + attachTargetOptions.spirvCapabilities.push_back("Shader"); + attachTargetOptions.spirvExtensions.push_back( + "SPV_KHR_storage_buffer_storage_class"); + passManager.addPass(createGpuSPIRVAttachTarget(attachTargetOptions)); + ConvertToSPIRVPassOptions convertToSPIRVOptions{}; convertToSPIRVOptions.convertGPUModules = true; + convertToSPIRVOptions.nestInGPUModule = true; passManager.addPass(createConvertToSPIRVPass(convertToSPIRVOptions)); - OpPassManager &modulePM = passManager.nest(); - modulePM.addPass(spirv::createSPIRVLowerABIAttributesPass()); - modulePM.addPass(spirv::createSPIRVUpdateVCEPass()); + + OpPassManager &spirvModulePM = + passManager.nest().nest(); + spirvModulePM.addPass(spirv::createSPIRVLowerABIAttributesPass()); + spirvModulePM.addPass(spirv::createSPIRVUpdateVCEPass()); + if (options.spirvWebGPUPrepare) + spirvModulePM.addPass(spirv::createSPIRVWebGPUPreparePass()); + + passManager.addPass(createGpuModuleToBinaryPass()); } } // namespace namespace mlir::test { void registerTestVulkanRunnerPipeline() { - PassPipelineRegistration<>( + PassPipelineRegistration( "test-vulkan-runner-pipeline", "Runs a series of passes for lowering GPU-dialect MLIR to " "SPIR-V-dialect MLIR intended for mlir-vulkan-runner.", diff --git a/mlir/test/mlir-vulkan-runner/addui_extended.mlir b/mlir/test/mlir-vulkan-runner/addui_extended.mlir index 158541f326be7..b8db451421459 100644 --- a/mlir/test/mlir-vulkan-runner/addui_extended.mlir +++ b/mlir/test/mlir-vulkan-runner/addui_extended.mlir @@ -6,7 +6,7 @@ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s -// RUN: mlir-opt %s -test-vulkan-runner-pipeline -spirv-webgpu-prepare \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \ // RUN: | mlir-vulkan-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s diff --git a/mlir/test/mlir-vulkan-runner/smul_extended.mlir b/mlir/test/mlir-vulkan-runner/smul_extended.mlir index 2dd31d2ebb9a0..334aec843e197 100644 --- a/mlir/test/mlir-vulkan-runner/smul_extended.mlir +++ b/mlir/test/mlir-vulkan-runner/smul_extended.mlir @@ -6,7 +6,7 @@ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s -// RUN: mlir-opt %s -test-vulkan-runner-pipeline -spirv-webgpu-prepare \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \ // RUN: | mlir-vulkan-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s diff --git a/mlir/test/mlir-vulkan-runner/umul_extended.mlir b/mlir/test/mlir-vulkan-runner/umul_extended.mlir index 78300d2fd81dd..803b8c3d336d3 100644 --- a/mlir/test/mlir-vulkan-runner/umul_extended.mlir +++ b/mlir/test/mlir-vulkan-runner/umul_extended.mlir @@ -6,7 +6,7 @@ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s -// RUN: mlir-opt %s -test-vulkan-runner-pipeline -spirv-webgpu-prepare \ +// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \ // RUN: | mlir-vulkan-runner - \ // RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ // RUN: --entry-point-result=void | FileCheck %s From 89499c666819d88c8abe85b58005fc8e5eb5f03f Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 9 Jan 2025 08:59:05 -0800 Subject: [PATCH 09/79] [BoundsChecking][test] Reorder RUN lines (#122229) To match output order of update_test_checks.py. --- .../BoundsChecking/runtimes.ll | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll index 7b95a4092af77..d61627287a31a 100644 --- a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll +++ b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll @@ -1,4 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt < %s -passes='bounds-checking' -S | FileCheck %s --check-prefixes=TR +; RUN: opt < %s -passes='bounds-checking' -S | FileCheck %s --check-prefixes=RT + ; RUN: opt < %s -passes=bounds-checking -S | FileCheck %s --check-prefixes=TR-NOMERGE ; RUN: opt < %s -passes='bounds-checking' -S | FileCheck %s --check-prefixes=TR-NOMERGE ; RUN: opt < %s -passes='bounds-checking' -S | FileCheck %s --check-prefixes=RT-NOMERGE @@ -129,11 +132,28 @@ define void @f1(i64 %x) nounwind { ret void } +;. +; TR: attributes #[[ATTR0]] = { nounwind } +; TR: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) } +; TR: attributes #[[ATTR2]] = { noreturn nounwind } +;. +; RT: attributes #[[ATTR0]] = { nounwind } +;. +; TR-NOMERGE: attributes #[[ATTR0]] = { nounwind } +; TR-NOMERGE: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind } ; TR-NOMERGE: attributes #[[ATTR2]] = { nomerge noreturn nounwind } +;. +; RT-NOMERGE: attributes #[[ATTR0]] = { nounwind } ; RT-NOMERGE: attributes #[[ATTR1]] = { nomerge nounwind } +;. +; RTABORT-NOMERGE: attributes #[[ATTR0]] = { nounwind } +; RTABORT-NOMERGE: attributes #[[ATTR1:[0-9]+]] = { noreturn nounwind } ; RTABORT-NOMERGE: attributes #[[ATTR2]] = { nomerge noreturn nounwind } +;. +; MINRT-NOMERGE: attributes #[[ATTR0]] = { nounwind } ; MINRT-NOMERGE: attributes #[[ATTR1]] = { nomerge nounwind } +;. +; MINRTABORT-NOMERGE: attributes #[[ATTR0]] = { nounwind } +; MINRTABORT-NOMERGE: attributes #[[ATTR1:[0-9]+]] = { noreturn nounwind } ; MINRTABORT-NOMERGE: attributes #[[ATTR2]] = { nomerge noreturn nounwind } -; -; TR: attributes #[[ATTR2]] = { noreturn nounwind } -; RT: attributes #[[ATTR0]] = { nounwind } +;. From a7cbd41000f55c0475bbfb42a2bb7af418c47a35 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 9 Jan 2025 09:00:50 -0800 Subject: [PATCH 10/79] [BoundsChecking][test] Remove duplicate RUNS Fixing failed conflict resolution in #122229. --- llvm/test/Instrumentation/BoundsChecking/runtimes.ll | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll index d61627287a31a..6695e0fa549fa 100644 --- a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll +++ b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll @@ -9,9 +9,6 @@ ; RUN: opt < %s -passes='bounds-checking' -S | FileCheck %s --check-prefixes=MINRT-NOMERGE ; RUN: opt < %s -passes='bounds-checking' -S | FileCheck %s --check-prefixes=MINRTABORT-NOMERGE ; -; RUN: opt < %s -passes='bounds-checking' -S | FileCheck %s --check-prefixes=TR -; RUN: opt < %s -passes='bounds-checking' -S | FileCheck %s --check-prefixes=RT - target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" define void @f1(i64 %x) nounwind { From e2449f1bceeefd4a603cae024a7a1db763df6834 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 9 Jan 2025 09:09:55 -0800 Subject: [PATCH 11/79] [SelectionDAG] Use SDNode::op_iterator instead of SDNodeIterator. NFC (#122147) I think SDNodeIterator primarily exists because GraphTraits requires an iterator that dereferences to SDNode*. op_iterator dereferences to SDUse* which is implicitly convertible to SDValue. This piece of code can use SDValue instead of SDNode* so we should prefer to use the the more common op_iterator. --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9f57884eae04d..56194e2614af2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2985,13 +2985,11 @@ bool TargetLowering::SimplifyDemandedBits( if (!isTargetCanonicalConstantNode(Op) && DemandedBits.isSubsetOf(Known.Zero | Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. - const SDNode *N = Op.getNode(); - for (SDNode *Op : - llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) { - if (auto *C = dyn_cast(Op)) - if (C->isOpaque()) - return false; - } + if (llvm::any_of(Op->ops(), [](SDValue V) { + auto *C = dyn_cast(V); + return C && C->isOpaque(); + })) + return false; if (VT.isInteger()) return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); if (VT.isFloatingPoint()) From c87ef146e1ceea3ebfcd0f59266043d53affced0 Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Thu, 9 Jan 2025 12:15:20 -0500 Subject: [PATCH 12/79] [InstCombine][NFC] Precommit a test for folding a binary op of reductions. (#121568) --- .../InstCombine/fold-binop-of-reductions.ll | 215 ++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll diff --git a/llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll b/llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll new file mode 100644 index 0000000000000..86f17cdfb79b4 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll @@ -0,0 +1,215 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i32 @add_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @add_of_reduce_add( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1) + %res = add i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @sub_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @sub_of_reduce_add( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i32> [[V0]], [[V1]] +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1) + %res = sub i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @mul_of_reduce_mul(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @mul_of_reduce_mul( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = mul i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1) + %res = mul i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @and_of_reduce_and(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @and_of_reduce_and( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = and i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v1) + %res = and i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @or_of_reduce_or(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @or_of_reduce_or( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = or i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v1) + %res = or i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @xor_of_reduce_xor(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @xor_of_reduce_xor( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = xor i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v1) + %res = xor i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @reduction_does_not_match_binop(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @reduction_does_not_match_binop( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1) + %res = add i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @intrinsics_do_not_match(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @intrinsics_do_not_match( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1) + %res = add i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @element_counts_do_not_match(<16 x i32> %v0, <8 x i32> %v1) { +; CHECK-LABEL: define i32 @element_counts_do_not_match( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v1) + %res = add i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @multiple_use_of_reduction_0(<16 x i32> %v0, <16 x i32> %v1, ptr %p) { +; CHECK-LABEL: define i32 @multiple_use_of_reduction_0( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: store i32 [[V0_RED]], ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1) + %res = add i32 %v0_red, %v1_red + store i32 %v0_red, ptr %p + ret i32 %res +} + +define i32 @multiple_use_of_reduction_1(<16 x i32> %v0, <16 x i32> %v1, ptr %p) { +; CHECK-LABEL: define i32 @multiple_use_of_reduction_1( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: store i32 [[V1_RED]], ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1) + %res = add i32 %v0_red, %v1_red + store i32 %v1_red, ptr %p + ret i32 %res +} + +define i32 @do_not_preserve_overflow_flags(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @do_not_preserve_overflow_flags( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = add nuw nsw i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1) + %res = add nsw nuw i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @preserve_disjoint_flags(<16 x i32> %v0, <16 x i32> %v1) { +; CHECK-LABEL: define i32 @preserve_disjoint_flags( +; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = or disjoint i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v0) + %v1_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v1) + %res = or disjoint i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @add_of_reduce_add_vscale( %v0, %v1) { +; CHECK-LABEL: define i32 @add_of_reduce_add_vscale( +; CHECK-SAME: [[V0:%.*]], [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32( [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32( [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.add.nxv16i32( %v0) + %v1_red = tail call i32 @llvm.vector.reduce.add.nxv16i32( %v1) + %res = add i32 %v0_red, %v1_red + ret i32 %res +} + +define i32 @element_counts_do_not_match_vscale( %v0, %v1) { +; CHECK-LABEL: define i32 @element_counts_do_not_match_vscale( +; CHECK-SAME: [[V0:%.*]], [[V1:%.*]]) { +; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32( [[V0]]) +; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv8i32( [[V1]]) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v0_red = tail call i32 @llvm.vector.reduce.add.nxv16i32( %v0) + %v1_red = tail call i32 @llvm.vector.reduce.add.nxv16i32( %v1) + %res = add i32 %v0_red, %v1_red + ret i32 %res +} From f8f8598fd886cddfd374fa43eb6d7d37d301b576 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Thu, 9 Jan 2025 09:25:51 -0800 Subject: [PATCH 13/79] [Exegesis] Add the ability to dry-run the measurement phase (#121991) With the new benchmark phase, `dry-run-measurement`, llvm-exegesis can run everything except the actual snippet execution. It is useful when we want to test some parts of the code between the `assemble-measured-code` and `measure` phase without actually running on native platforms. --- llvm/docs/CommandGuide/llvm-exegesis.rst | 1 + .../llvm-exegesis/dry-run-measurement.test | 11 +++++++ .../tools/llvm-exegesis/lib/BenchmarkResult.h | 1 + .../llvm-exegesis/lib/BenchmarkRunner.cpp | 33 ++++++++++++++----- llvm/tools/llvm-exegesis/lib/Target.cpp | 4 +-- llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 9 +++-- 6 files changed, 46 insertions(+), 13 deletions(-) create mode 100644 llvm/test/tools/llvm-exegesis/dry-run-measurement.test diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst index 8266d891a5e6b..d357c2ceea418 100644 --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -301,6 +301,7 @@ OPTIONS * ``prepare-and-assemble-snippet``: Same as ``prepare-snippet``, but also dumps an excerpt of the sequence (hex encoded). * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``. * ``measure``: Same as ``assemble-measured-code``, but also runs the measurement. + * ``dry-run-measurement``: Same as measure, but does not actually execute the snippet. .. option:: --x86-lbr-sample-period= diff --git a/llvm/test/tools/llvm-exegesis/dry-run-measurement.test b/llvm/test/tools/llvm-exegesis/dry-run-measurement.test new file mode 100644 index 0000000000000..e4449d7df3d82 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/dry-run-measurement.test @@ -0,0 +1,11 @@ +# RUN: llvm-exegesis --mtriple=riscv64 --mcpu=sifive-p470 --mode=latency --opcode-name=ADD --use-dummy-perf-counters --benchmark-phase=dry-run-measurement | FileCheck %s +# REQUIRES: riscv-registered-target + +# This test makes sure that llvm-exegesis doesn't execute "cross-compiled" snippets in the presence of +# --dry-run-measurement. RISC-V was chosen simply because most of the time we run tests on X86 machines. + +# Should not contain misleading results. +# CHECK: measurements: [] + +# Should not contain error messages like "snippet crashed while running: Segmentation fault". +# CHECK: error: '' diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h index 3c09a8380146e..5480d85616878 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -38,6 +38,7 @@ enum class BenchmarkPhaseSelectorE { PrepareAndAssembleSnippet, AssembleMeasuredCode, Measure, + DryRunMeasure, }; enum class BenchmarkFilter { All, RegOnly, WithMem }; diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp index a7771b99e97b1..cc46f7feb6cf7 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -99,7 +99,7 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { static Expected> create(const LLVMState &State, object::OwningBinary Obj, BenchmarkRunner::ScratchSpace *Scratch, - std::optional BenchmarkProcessCPU) { + std::optional BenchmarkProcessCPU, bool DryRun) { Expected EF = ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); @@ -107,14 +107,17 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { return EF.takeError(); return std::unique_ptr( - new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch)); + new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch, + DryRun)); } private: InProcessFunctionExecutorImpl(const LLVMState &State, ExecutableFunction Function, - BenchmarkRunner::ScratchSpace *Scratch) - : State(State), Function(std::move(Function)), Scratch(Scratch) {} + BenchmarkRunner::ScratchSpace *Scratch, + bool DryRun) + : State(State), Function(std::move(Function)), Scratch(Scratch), + DryRun(DryRun) {} static void accumulateCounterValues(const SmallVector &NewValues, SmallVector *Result) { @@ -143,9 +146,14 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { CrashRecoveryContext CRC; CrashRecoveryContext::Enable(); const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() { - Counter->start(); - this->Function(ScratchPtr); - Counter->stop(); + if (DryRun) { + Counter->start(); + Counter->stop(); + } else { + Counter->start(); + this->Function(ScratchPtr); + Counter->stop(); + } }); CrashRecoveryContext::Disable(); PS.reset(); @@ -177,6 +185,7 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { const LLVMState &State; const ExecutableFunction Function; BenchmarkRunner::ScratchSpace *const Scratch; + bool DryRun = false; }; #ifdef __linux__ @@ -664,6 +673,9 @@ Expected> BenchmarkRunner::createFunctionExecutor( object::OwningBinary ObjectFile, const BenchmarkKey &Key, std::optional BenchmarkProcessCPU) const { + bool DryRun = + BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::DryRunMeasure; + switch (ExecutionMode) { case ExecutionModeE::InProcess: { if (BenchmarkProcessCPU.has_value()) @@ -671,7 +683,8 @@ BenchmarkRunner::createFunctionExecutor( "support benchmark core pinning."); auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create( - State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU); + State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU, + DryRun); if (!InProcessExecutorOrErr) return InProcessExecutorOrErr.takeError(); @@ -679,6 +692,10 @@ BenchmarkRunner::createFunctionExecutor( } case ExecutionModeE::SubProcess: { #ifdef __linux__ + if (DryRun) + return make_error("The subprocess execution mode cannot " + "dry-run measurement at this moment."); + auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create( State, std::move(ObjectFile), Key, BenchmarkProcessCPU); if (!SubProcessExecutorOrErr) diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp index 29e58692f0e92..e2251ff978888 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/Target.cpp @@ -98,7 +98,7 @@ ExegesisTarget::createBenchmarkRunner( return nullptr; case Benchmark::Latency: case Benchmark::InverseThroughput: - if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && + if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure && !PfmCounters.CycleCounter) { const char *ModeName = Mode == Benchmark::Latency ? "latency" @@ -116,7 +116,7 @@ ExegesisTarget::createBenchmarkRunner( State, Mode, BenchmarkPhaseSelector, ResultAggMode, ExecutionMode, ValidationCounters, BenchmarkRepeatCount); case Benchmark::Uops: - if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && + if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure && !PfmCounters.UopsCounter && !PfmCounters.IssueCounters) return make_error( "can't run 'uops' mode, sched model does not define uops or issue " diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index fa37e05956be8..07bd44ee64f1f 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -132,7 +132,10 @@ static cl::opt BenchmarkPhaseSelector( clEnumValN( BenchmarkPhaseSelectorE::Measure, "measure", "Same as prepare-measured-code, but also runs the measurement " - "(default)")), + "(default)"), + clEnumValN( + BenchmarkPhaseSelectorE::DryRunMeasure, "dry-run-measurement", + "Same as measure, but does not actually execute the snippet")), cl::init(BenchmarkPhaseSelectorE::Measure)); static cl::opt @@ -476,7 +479,7 @@ static void runBenchmarkConfigurations( } void benchmarkMain() { - if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && + if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure && !UseDummyPerfCounters) { #ifndef HAVE_LIBPFM ExitWithError( @@ -501,7 +504,7 @@ void benchmarkMain() { // Preliminary check to ensure features needed for requested // benchmark mode are present on target CPU and/or OS. - if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure) + if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure) ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess && From be32621ce8cbffe674c62e87c0c51c9fc4a21e5f Mon Sep 17 00:00:00 2001 From: erichkeane Date: Thu, 9 Jan 2025 07:15:05 -0800 Subject: [PATCH 14/79] [OpenACC] Implement 'device' and 'host' clauses for 'update' These two clauses just take a 'var-list' and specify where the variables should be copied from/to. This patch implements the AST nodes for them and ensures they properly take a var-list. --- clang/include/clang/AST/OpenACCClause.h | 46 +++++++++++ clang/include/clang/Basic/OpenACCClauses.def | 2 + clang/include/clang/Sema/SemaOpenACC.h | 6 ++ clang/lib/AST/OpenACCClause.cpp | 38 +++++++++- clang/lib/AST/StmtProfile.cpp | 9 +++ clang/lib/AST/TextNodeDumper.cpp | 2 + clang/lib/Parse/ParseOpenACC.cpp | 7 +- clang/lib/Sema/SemaOpenACC.cpp | 38 ++++++++++ clang/lib/Sema/TreeTransform.h | 24 ++++++ clang/lib/Serialization/ASTReader.cpp | 14 +++- clang/lib/Serialization/ASTWriter.cpp | 14 +++- .../ast-print-openacc-update-construct.cpp | 6 ++ clang/test/ParserOpenACC/parse-clauses.c | 16 ++-- ...d-construct-auto_seq_independent-clauses.c | 24 +++--- .../combined-construct-device_type-clause.c | 6 +- .../compute-construct-device_type-clause.c | 6 +- ...p-construct-auto_seq_independent-clauses.c | 24 +++--- .../loop-construct-device_type-clause.c | 6 +- .../test/SemaOpenACC/update-construct-ast.cpp | 76 +++++++++++++++++++ clang/test/SemaOpenACC/update-construct.cpp | 40 ++++++---- clang/tools/libclang/CIndex.cpp | 8 ++ 21 files changed, 345 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h index 4e4dd3447926e..f5be54bdada8b 100644 --- a/clang/include/clang/AST/OpenACCClause.h +++ b/clang/include/clang/AST/OpenACCClause.h @@ -965,6 +965,52 @@ class OpenACCPresentClause final Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc, ArrayRef VarList, SourceLocation EndLoc); }; +class OpenACCHostClause final + : public OpenACCClauseWithVarList, + private llvm::TrailingObjects { + friend TrailingObjects; + + OpenACCHostClause(SourceLocation BeginLoc, SourceLocation LParenLoc, + ArrayRef VarList, SourceLocation EndLoc) + : OpenACCClauseWithVarList(OpenACCClauseKind::Host, BeginLoc, LParenLoc, + EndLoc) { + std::uninitialized_copy(VarList.begin(), VarList.end(), + getTrailingObjects()); + setExprs(MutableArrayRef(getTrailingObjects(), VarList.size())); + } + +public: + static bool classof(const OpenACCClause *C) { + return C->getClauseKind() == OpenACCClauseKind::Host; + } + static OpenACCHostClause *Create(const ASTContext &C, SourceLocation BeginLoc, + SourceLocation LParenLoc, + ArrayRef VarList, + SourceLocation EndLoc); +}; + +class OpenACCDeviceClause final + : public OpenACCClauseWithVarList, + private llvm::TrailingObjects { + friend TrailingObjects; + + OpenACCDeviceClause(SourceLocation BeginLoc, SourceLocation LParenLoc, + ArrayRef VarList, SourceLocation EndLoc) + : OpenACCClauseWithVarList(OpenACCClauseKind::Device, BeginLoc, LParenLoc, + EndLoc) { + std::uninitialized_copy(VarList.begin(), VarList.end(), + getTrailingObjects()); + setExprs(MutableArrayRef(getTrailingObjects(), VarList.size())); + } + +public: + static bool classof(const OpenACCClause *C) { + return C->getClauseKind() == OpenACCClauseKind::Device; + } + static OpenACCDeviceClause * + Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc, + ArrayRef VarList, SourceLocation EndLoc); +}; class OpenACCCopyClause final : public OpenACCClauseWithVarList, diff --git a/clang/include/clang/Basic/OpenACCClauses.def b/clang/include/clang/Basic/OpenACCClauses.def index d6fb015c64913..8b15007c85557 100644 --- a/clang/include/clang/Basic/OpenACCClauses.def +++ b/clang/include/clang/Basic/OpenACCClauses.def @@ -41,6 +41,7 @@ VISIT_CLAUSE(Default) VISIT_CLAUSE(DefaultAsync) VISIT_CLAUSE(Delete) VISIT_CLAUSE(Detach) +VISIT_CLAUSE(Device) VISIT_CLAUSE(DeviceNum) VISIT_CLAUSE(DevicePtr) VISIT_CLAUSE(DeviceType) @@ -48,6 +49,7 @@ CLAUSE_ALIAS(DType, DeviceType, false) VISIT_CLAUSE(Finalize) VISIT_CLAUSE(FirstPrivate) VISIT_CLAUSE(Gang) +VISIT_CLAUSE(Host) VISIT_CLAUSE(If) VISIT_CLAUSE(IfPresent) VISIT_CLAUSE(Independent) diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index 0f86d46bc9802..2e5a0ea0aaac6 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -409,6 +409,8 @@ class SemaOpenACC : public SemaBase { ClauseKind == OpenACCClauseKind::Detach || ClauseKind == OpenACCClauseKind::DevicePtr || ClauseKind == OpenACCClauseKind::Reduction || + ClauseKind == OpenACCClauseKind::Host || + ClauseKind == OpenACCClauseKind::Device || (ClauseKind == OpenACCClauseKind::Self && DirKind == OpenACCDirectiveKind::Update) || ClauseKind == OpenACCClauseKind::FirstPrivate) && @@ -553,6 +555,8 @@ class SemaOpenACC : public SemaBase { ClauseKind == OpenACCClauseKind::UseDevice || ClauseKind == OpenACCClauseKind::Detach || ClauseKind == OpenACCClauseKind::DevicePtr || + ClauseKind == OpenACCClauseKind::Host || + ClauseKind == OpenACCClauseKind::Device || (ClauseKind == OpenACCClauseKind::Self && DirKind == OpenACCDirectiveKind::Update) || ClauseKind == OpenACCClauseKind::FirstPrivate) && @@ -594,6 +598,8 @@ class SemaOpenACC : public SemaBase { ClauseKind == OpenACCClauseKind::UseDevice || ClauseKind == OpenACCClauseKind::Detach || ClauseKind == OpenACCClauseKind::DevicePtr || + ClauseKind == OpenACCClauseKind::Host || + ClauseKind == OpenACCClauseKind::Device || (ClauseKind == OpenACCClauseKind::Self && DirKind == OpenACCDirectiveKind::Update) || ClauseKind == OpenACCClauseKind::FirstPrivate) && diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp index da63b471d9856..aa14ab902ba66 100644 --- a/clang/lib/AST/OpenACCClause.cpp +++ b/clang/lib/AST/OpenACCClause.cpp @@ -38,7 +38,9 @@ bool OpenACCClauseWithVarList::classof(const OpenACCClause *C) { OpenACCNoCreateClause::classof(C) || OpenACCPresentClause::classof(C) || OpenACCCopyClause::classof(C) || OpenACCCopyInClause::classof(C) || OpenACCCopyOutClause::classof(C) || - OpenACCReductionClause::classof(C) || OpenACCCreateClause::classof(C); + OpenACCReductionClause::classof(C) || + OpenACCCreateClause::classof(C) || OpenACCDeviceClause::classof(C) || + OpenACCHostClause::classof(C); } bool OpenACCClauseWithCondition::classof(const OpenACCClause *C) { return OpenACCIfClause::classof(C); @@ -406,6 +408,26 @@ OpenACCPresentClause *OpenACCPresentClause::Create(const ASTContext &C, return new (Mem) OpenACCPresentClause(BeginLoc, LParenLoc, VarList, EndLoc); } +OpenACCHostClause *OpenACCHostClause::Create(const ASTContext &C, + SourceLocation BeginLoc, + SourceLocation LParenLoc, + ArrayRef VarList, + SourceLocation EndLoc) { + void *Mem = + C.Allocate(OpenACCHostClause::totalSizeToAlloc(VarList.size())); + return new (Mem) OpenACCHostClause(BeginLoc, LParenLoc, VarList, EndLoc); +} + +OpenACCDeviceClause *OpenACCDeviceClause::Create(const ASTContext &C, + SourceLocation BeginLoc, + SourceLocation LParenLoc, + ArrayRef VarList, + SourceLocation EndLoc) { + void *Mem = + C.Allocate(OpenACCDeviceClause::totalSizeToAlloc(VarList.size())); + return new (Mem) OpenACCDeviceClause(BeginLoc, LParenLoc, VarList, EndLoc); +} + OpenACCCopyClause * OpenACCCopyClause::Create(const ASTContext &C, OpenACCClauseKind Spelling, SourceLocation BeginLoc, SourceLocation LParenLoc, @@ -711,6 +733,20 @@ void OpenACCClausePrinter::VisitPresentClause(const OpenACCPresentClause &C) { OS << ")"; } +void OpenACCClausePrinter::VisitHostClause(const OpenACCHostClause &C) { + OS << "host("; + llvm::interleaveComma(C.getVarList(), OS, + [&](const Expr *E) { printExpr(E); }); + OS << ")"; +} + +void OpenACCClausePrinter::VisitDeviceClause(const OpenACCDeviceClause &C) { + OS << "device("; + llvm::interleaveComma(C.getVarList(), OS, + [&](const Expr *E) { printExpr(E); }); + OS << ")"; +} + void OpenACCClausePrinter::VisitCopyClause(const OpenACCCopyClause &C) { OS << C.getClauseKind() << '('; llvm::interleaveComma(C.getVarList(), OS, diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index cd91a7900538b..0f1ebc68a4f76 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2554,6 +2554,15 @@ void OpenACCClauseProfiler::VisitCreateClause( VisitClauseWithVarList(Clause); } +void OpenACCClauseProfiler::VisitHostClause(const OpenACCHostClause &Clause) { + VisitClauseWithVarList(Clause); +} + +void OpenACCClauseProfiler::VisitDeviceClause( + const OpenACCDeviceClause &Clause) { + VisitClauseWithVarList(Clause); +} + void OpenACCClauseProfiler::VisitSelfClause(const OpenACCSelfClause &Clause) { if (Clause.isConditionExprClause()) { if (Clause.hasConditionExpr()) diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index eedd8faad9e85..670641242cae2 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -408,11 +408,13 @@ void TextNodeDumper::Visit(const OpenACCClause *C) { case OpenACCClauseKind::Copy: case OpenACCClauseKind::PCopy: case OpenACCClauseKind::PresentOrCopy: + case OpenACCClauseKind::Host: case OpenACCClauseKind::If: case OpenACCClauseKind::IfPresent: case OpenACCClauseKind::Independent: case OpenACCClauseKind::Detach: case OpenACCClauseKind::Delete: + case OpenACCClauseKind::Device: case OpenACCClauseKind::DeviceNum: case OpenACCClauseKind::DefaultAsync: case OpenACCClauseKind::DevicePtr: diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index c79ba97a20077..98fd61913e5a4 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -1000,15 +1000,16 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( } case OpenACCClauseKind::Self: // The 'self' clause is a var-list instead of a 'condition' in the case of - // the 'update' clause, so we have to handle it here. U se an assert to + // the 'update' clause, so we have to handle it here. Use an assert to // make sure we get the right differentiator. assert(DirKind == OpenACCDirectiveKind::Update); + [[fallthrough]]; + case OpenACCClauseKind::Device: + case OpenACCClauseKind::Host: ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind), /*IsReadOnly=*/false, /*IsZero=*/false); break; - case OpenACCClauseKind::Device: case OpenACCClauseKind::DeviceResident: - case OpenACCClauseKind::Host: case OpenACCClauseKind::Link: ParseOpenACCVarList(ClauseKind); break; diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 51a95f99f0624..a2973f1f99b2c 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -471,6 +471,22 @@ bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind, return false; } } + case OpenACCClauseKind::Device: { + switch (DirectiveKind) { + case OpenACCDirectiveKind::Update: + return true; + default: + return false; + } + } + case OpenACCClauseKind::Host: { + switch (DirectiveKind) { + case OpenACCDirectiveKind::Update: + return true; + default: + return false; + } + } } default: @@ -1040,6 +1056,28 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitPresentClause( Clause.getVarList(), Clause.getEndLoc()); } +OpenACCClause *SemaOpenACCClauseVisitor::VisitHostClause( + SemaOpenACC::OpenACCParsedClause &Clause) { + // ActOnVar ensured that everything is a valid variable reference, so there + // really isn't anything to do here. GCC does some duplicate-finding, though + // it isn't apparent in the standard where this is justified. + + return OpenACCHostClause::Create(Ctx, Clause.getBeginLoc(), + Clause.getLParenLoc(), Clause.getVarList(), + Clause.getEndLoc()); +} + +OpenACCClause *SemaOpenACCClauseVisitor::VisitDeviceClause( + SemaOpenACC::OpenACCParsedClause &Clause) { + // ActOnVar ensured that everything is a valid variable reference, so there + // really isn't anything to do here. GCC does some duplicate-finding, though + // it isn't apparent in the standard where this is justified. + + return OpenACCDeviceClause::Create(Ctx, Clause.getBeginLoc(), + Clause.getLParenLoc(), Clause.getVarList(), + Clause.getEndLoc()); +} + OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyClause( SemaOpenACC::OpenACCParsedClause &Clause) { // Restrictions only properly implemented on 'compute'/'combined'/'data' diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index d00ad5a35e823..4a3c739ecbeab 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -11730,6 +11730,30 @@ void OpenACCClauseTransform::VisitPrivateClause( ParsedClause.getEndLoc()); } +template +void OpenACCClauseTransform::VisitHostClause( + const OpenACCHostClause &C) { + ParsedClause.setVarListDetails(VisitVarList(C.getVarList()), + /*IsReadOnly=*/false, /*IsZero=*/false); + + NewClause = OpenACCHostClause::Create( + Self.getSema().getASTContext(), ParsedClause.getBeginLoc(), + ParsedClause.getLParenLoc(), ParsedClause.getVarList(), + ParsedClause.getEndLoc()); +} + +template +void OpenACCClauseTransform::VisitDeviceClause( + const OpenACCDeviceClause &C) { + ParsedClause.setVarListDetails(VisitVarList(C.getVarList()), + /*IsReadOnly=*/false, /*IsZero=*/false); + + NewClause = OpenACCDeviceClause::Create( + Self.getSema().getASTContext(), ParsedClause.getBeginLoc(), + ParsedClause.getLParenLoc(), ParsedClause.getVarList(), + ParsedClause.getEndLoc()); +} + template void OpenACCClauseTransform::VisitFirstPrivateClause( const OpenACCFirstPrivateClause &C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0368990ca150d..b53f99732cacc 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -12439,6 +12439,18 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { return OpenACCPrivateClause::Create(getContext(), BeginLoc, LParenLoc, VarList, EndLoc); } + case OpenACCClauseKind::Host: { + SourceLocation LParenLoc = readSourceLocation(); + llvm::SmallVector VarList = readOpenACCVarList(); + return OpenACCHostClause::Create(getContext(), BeginLoc, LParenLoc, VarList, + EndLoc); + } + case OpenACCClauseKind::Device: { + SourceLocation LParenLoc = readSourceLocation(); + llvm::SmallVector VarList = readOpenACCVarList(); + return OpenACCDeviceClause::Create(getContext(), BeginLoc, LParenLoc, + VarList, EndLoc); + } case OpenACCClauseKind::FirstPrivate: { SourceLocation LParenLoc = readSourceLocation(); llvm::SmallVector VarList = readOpenACCVarList(); @@ -12611,9 +12623,7 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() { } case OpenACCClauseKind::NoHost: - case OpenACCClauseKind::Device: case OpenACCClauseKind::DeviceResident: - case OpenACCClauseKind::Host: case OpenACCClauseKind::Link: case OpenACCClauseKind::Bind: case OpenACCClauseKind::Invalid: diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 8d9396e28ed50..39004fd4d4c37 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -8371,6 +8371,18 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { writeOpenACCVarList(PC); return; } + case OpenACCClauseKind::Host: { + const auto *HC = cast(C); + writeSourceLocation(HC->getLParenLoc()); + writeOpenACCVarList(HC); + return; + } + case OpenACCClauseKind::Device: { + const auto *DC = cast(C); + writeSourceLocation(DC->getLParenLoc()); + writeOpenACCVarList(DC); + return; + } case OpenACCClauseKind::FirstPrivate: { const auto *FPC = cast(C); writeSourceLocation(FPC->getLParenLoc()); @@ -8544,9 +8556,7 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { } case OpenACCClauseKind::NoHost: - case OpenACCClauseKind::Device: case OpenACCClauseKind::DeviceResident: - case OpenACCClauseKind::Host: case OpenACCClauseKind::Link: case OpenACCClauseKind::Bind: case OpenACCClauseKind::Invalid: diff --git a/clang/test/AST/ast-print-openacc-update-construct.cpp b/clang/test/AST/ast-print-openacc-update-construct.cpp index a7f5b2a42285b..0d09c829929dc 100644 --- a/clang/test/AST/ast-print-openacc-update-construct.cpp +++ b/clang/test/AST/ast-print-openacc-update-construct.cpp @@ -38,4 +38,10 @@ void uses(bool cond) { // CHECK: #pragma acc update self(I, iPtr, array, array[1], array[1:2]) #pragma acc update self(I, iPtr, array, array[1], array[1:2]) + +// CHECK: #pragma acc update host(I, iPtr, array, array[1], array[1:2]) +#pragma acc update host (I, iPtr, array, array[1], array[1:2]) + +// CHECK: #pragma acc update device(I, iPtr, array, array[1], array[1:2]) +#pragma acc update device(I, iPtr, array, array[1], array[1:2]) } diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c index 73a09697710f9..930cc1e4c0353 100644 --- a/clang/test/ParserOpenACC/parse-clauses.c +++ b/clang/test/ParserOpenACC/parse-clauses.c @@ -538,22 +538,18 @@ void VarListClauses() { #pragma acc serial link(s.array[s.value : 5], s.value), self for(int i = 0; i < 5;++i) {} - // expected-error@+2{{expected ','}} - // expected-warning@+1{{OpenACC clause 'host' not yet implemented, clause ignored}} -#pragma acc serial host(s.array[s.value] s.array[s.value :5] ), self + // expected-error@+1{{expected ','}} +#pragma acc update host(s.array[s.value] s.array[s.value :5] ) for(int i = 0; i < 5;++i) {} - // expected-warning@+1{{OpenACC clause 'host' not yet implemented, clause ignored}} -#pragma acc serial host(s.array[s.value : 5], s.value), self +#pragma acc update host(s.array[s.value : 5], s.value) for(int i = 0; i < 5;++i) {} - // expected-error@+2{{expected ','}} - // expected-warning@+1{{OpenACC clause 'device' not yet implemented, clause ignored}} -#pragma acc serial device(s.array[s.value] s.array[s.value :5] ), self + // expected-error@+1{{expected ','}} +#pragma acc update device(s.array[s.value] s.array[s.value :5] ) for(int i = 0; i < 5;++i) {} - // expected-warning@+1{{OpenACC clause 'device' not yet implemented, clause ignored}} -#pragma acc serial device(s.array[s.value : 5], s.value), self +#pragma acc update device(s.array[s.value : 5], s.value) for(int i = 0; i < 5;++i) {} // expected-error@+1{{expected ','}} diff --git a/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c b/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c index 9a98f5e1659b8..9e74ce27ffbd9 100644 --- a/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c +++ b/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c @@ -75,7 +75,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop auto detach(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop auto device(VarPtr) for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop auto deviceptr(VarPtr) @@ -85,7 +85,7 @@ void uses() { for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop auto firstprivate(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop auto host(Var) for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -192,7 +192,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop detach(Var) auto for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop device(VarPtr) auto for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop deviceptr(VarPtr) auto @@ -202,7 +202,7 @@ void uses() { for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop firstprivate(Var) auto for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop host(Var) auto for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -310,7 +310,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop independent detach(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop independent device(VarPtr) for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop independent deviceptr(VarPtr) @@ -320,7 +320,7 @@ void uses() { for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop independent firstprivate(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop independent host(Var) for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -427,7 +427,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop detach(Var) independent for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop device(VarPtr) independent for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop deviceptr(VarPtr) independent @@ -437,7 +437,7 @@ void uses() { for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop firstprivate(Var) independent for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop host(Var) independent for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -553,7 +553,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop seq detach(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop seq device(VarPtr) for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop seq deviceptr(VarPtr) @@ -563,7 +563,7 @@ void uses() { for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop seq firstprivate(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop seq host(Var) for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -676,7 +676,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop detach(Var) seq for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop device(VarPtr) seq for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop deviceptr(VarPtr) seq @@ -686,7 +686,7 @@ void uses() { for(unsigned i = 0; i < 5; ++i); #pragma acc parallel loop firstprivate(Var) seq for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop host(Var) seq for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} diff --git a/clang/test/SemaOpenACC/combined-construct-device_type-clause.c b/clang/test/SemaOpenACC/combined-construct-device_type-clause.c index 8072ae7de4ffc..c185f607548ff 100644 --- a/clang/test/SemaOpenACC/combined-construct-device_type-clause.c +++ b/clang/test/SemaOpenACC/combined-construct-device_type-clause.c @@ -100,8 +100,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'kernels loop' directive}} #pragma acc kernels loop device_type(*) detach(Var) for(int i = 0; i < 5; ++i); - // expected-error@+2{{OpenACC clause 'device' may not follow a 'device_type' clause in a 'parallel loop' construct}} - // expected-note@+1{{previous clause is here}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}} #pragma acc parallel loop device_type(*) device(VarPtr) for(int i = 0; i < 5; ++i); // expected-error@+2{{OpenACC clause 'deviceptr' may not follow a 'device_type' clause in a 'serial loop' construct}} @@ -116,8 +115,7 @@ void uses() { // expected-note@+1{{previous clause is here}} #pragma acc parallel loop device_type(*) firstprivate(Var) for(int i = 0; i < 5; ++i); - // expected-error@+2{{OpenACC clause 'host' may not follow a 'device_type' clause in a 'serial loop' construct}} - // expected-note@+1{{previous clause is here}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'serial loop' directive}} #pragma acc serial loop device_type(*) host(Var) for(int i = 0; i < 5; ++i); // expected-error@+2{{OpenACC clause 'link' may not follow a 'device_type' clause in a 'parallel loop' construct}} diff --git a/clang/test/SemaOpenACC/compute-construct-device_type-clause.c b/clang/test/SemaOpenACC/compute-construct-device_type-clause.c index 33f433ce4c519..4290fb7665685 100644 --- a/clang/test/SemaOpenACC/compute-construct-device_type-clause.c +++ b/clang/test/SemaOpenACC/compute-construct-device_type-clause.c @@ -106,8 +106,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'kernels' directive}} #pragma acc kernels device_type(*) detach(Var) while(1); - // expected-error@+2{{OpenACC clause 'device' may not follow a 'device_type' clause in a 'kernels' construct}} - // expected-note@+1{{previous clause is here}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'kernels' directive}} #pragma acc kernels device_type(*) device(VarPtr) while(1); // expected-error@+2{{OpenACC clause 'deviceptr' may not follow a 'device_type' clause in a 'kernels' construct}} @@ -122,8 +121,7 @@ void uses() { // expected-note@+1{{previous clause is here}} #pragma acc parallel device_type(*) firstprivate(Var) while(1); - // expected-error@+2{{OpenACC clause 'host' may not follow a 'device_type' clause in a 'kernels' construct}} - // expected-note@+1{{previous clause is here}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'kernels' directive}} #pragma acc kernels device_type(*) host(Var) while(1); // expected-error@+2{{OpenACC clause 'link' may not follow a 'device_type' clause in a 'kernels' construct}} diff --git a/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c b/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c index d9a4c61a81c7d..f56a1267fbad1 100644 --- a/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c +++ b/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c @@ -80,7 +80,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}} #pragma acc loop auto detach(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}} #pragma acc loop auto device(VarPtr) for(unsigned i = 0; i < 5; ++i); // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}} @@ -92,7 +92,7 @@ void uses() { // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}} #pragma acc loop auto firstprivate(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}} #pragma acc loop auto host(Var) for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -214,7 +214,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}} #pragma acc loop detach(Var) auto for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}} #pragma acc loop device(VarPtr) auto for(unsigned i = 0; i < 5; ++i); // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}} @@ -226,7 +226,7 @@ void uses() { // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}} #pragma acc loop firstprivate(Var) auto for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}} #pragma acc loop host(Var) auto for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -349,7 +349,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}} #pragma acc loop independent detach(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}} #pragma acc loop independent device(VarPtr) for(unsigned i = 0; i < 5; ++i); // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}} @@ -361,7 +361,7 @@ void uses() { // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}} #pragma acc loop independent firstprivate(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}} #pragma acc loop independent host(Var) for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -483,7 +483,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}} #pragma acc loop detach(Var) independent for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}} #pragma acc loop device(VarPtr) independent for(unsigned i = 0; i < 5; ++i); // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}} @@ -495,7 +495,7 @@ void uses() { // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}} #pragma acc loop firstprivate(Var) independent for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}} #pragma acc loop host(Var) independent for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -626,7 +626,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}} #pragma acc loop seq detach(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}} #pragma acc loop seq device(VarPtr) for(unsigned i = 0; i < 5; ++i); // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}} @@ -638,7 +638,7 @@ void uses() { // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}} #pragma acc loop seq firstprivate(Var) for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}} #pragma acc loop seq host(Var) for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} @@ -766,7 +766,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}} #pragma acc loop detach(Var) seq for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}} #pragma acc loop device(VarPtr) seq for(unsigned i = 0; i < 5; ++i); // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}} @@ -778,7 +778,7 @@ void uses() { // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}} #pragma acc loop firstprivate(Var) seq for(unsigned i = 0; i < 5; ++i); - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}} #pragma acc loop host(Var) seq for(unsigned i = 0; i < 5; ++i); // expected-warning@+1{{OpenACC clause 'link' not yet implemented}} diff --git a/clang/test/SemaOpenACC/loop-construct-device_type-clause.c b/clang/test/SemaOpenACC/loop-construct-device_type-clause.c index f16f17f5d6810..2c1189bc647d0 100644 --- a/clang/test/SemaOpenACC/loop-construct-device_type-clause.c +++ b/clang/test/SemaOpenACC/loop-construct-device_type-clause.c @@ -92,8 +92,7 @@ void uses() { // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}} #pragma acc loop device_type(*) detach(Var) for(int i = 0; i < 5; ++i); - // expected-error@+2{{OpenACC clause 'device' may not follow a 'device_type' clause in a 'loop' construct}} - // expected-note@+1{{previous clause is here}} + // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}} #pragma acc loop device_type(*) device(VarPtr) for(int i = 0; i < 5; ++i); // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}} @@ -106,8 +105,7 @@ void uses() { // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}} #pragma acc loop device_type(*) firstprivate(Var) for(int i = 0; i < 5; ++i); - // expected-error@+2{{OpenACC clause 'host' may not follow a 'device_type' clause in a 'loop' construct}} - // expected-note@+1{{previous clause is here}} + // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}} #pragma acc loop device_type(*) host(Var) for(int i = 0; i < 5; ++i); // expected-error@+2{{OpenACC clause 'link' may not follow a 'device_type' clause in a 'loop' construct}} diff --git a/clang/test/SemaOpenACC/update-construct-ast.cpp b/clang/test/SemaOpenACC/update-construct-ast.cpp index 9048e8823f5f5..5582bde7a64c2 100644 --- a/clang/test/SemaOpenACC/update-construct-ast.cpp +++ b/clang/test/SemaOpenACC/update-construct-ast.cpp @@ -89,6 +89,33 @@ void NormalFunc() { // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' // CHECK-NEXT: IntegerLiteral{{.*}} 0 // CHECK-NEXT: IntegerLiteral{{.*}} 1 + +#pragma acc update host(Global, GlobalArray, GlobalArray[0], GlobalArray[0:1]) device(Global, GlobalArray, GlobalArray[0], GlobalArray[0:1]) + // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: host clause + // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int' + // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' + // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' + // CHECK-NEXT: IntegerLiteral{{.*}} 0 + // CHECK-NEXT: ArraySectionExpr + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' + // CHECK-NEXT: IntegerLiteral{{.*}} 0 + // CHECK-NEXT: IntegerLiteral{{.*}} 1 + // CHECK-NEXT: device clause + // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int' + // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' + // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' + // CHECK-NEXT: IntegerLiteral{{.*}} 0 + // CHECK-NEXT: ArraySectionExpr + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' + // CHECK-NEXT: IntegerLiteral{{.*}} 0 + // CHECK-NEXT: IntegerLiteral{{.*}} 1 } template @@ -163,6 +190,29 @@ void TemplFunc(T t) { // CHECK-NEXT: IntegerLiteral{{.*}}0 // CHECK-NEXT: IntegerLiteral{{.*}}1 +#pragma acc update host(Local, LocalArray, LocalArray[0], LocalArray[0:1]) device(Local, LocalArray, LocalArray[0], LocalArray[0:1]) + // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: host clause + // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(T::value)' + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' + // CHECK-NEXT: ArraySubscriptExpr + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' + // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: ArraySectionExpr + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' + // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}1 + // CHECK-NEXT: device clause + // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(T::value)' + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' + // CHECK-NEXT: ArraySubscriptExpr + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' + // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: ArraySectionExpr + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' + // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}1 + // Instantiation: // CHECK-NEXT: FunctionDecl{{.*}} TemplFunc 'void (SomeStruct)' implicit_instantiation // CHECK-NEXT: TemplateArgument type 'SomeStruct' @@ -255,6 +305,32 @@ void TemplFunc(T t) { // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' // CHECK-NEXT: IntegerLiteral{{.*}}0 // CHECK-NEXT: IntegerLiteral{{.*}}1 + + // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: host clause + // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(SomeStruct::value)':'const unsigned int' + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' + // CHECK-NEXT: ArraySubscriptExpr + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' + // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: ArraySectionExpr + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' + // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}1 + // CHECK-NEXT: device clause + // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(SomeStruct::value)':'const unsigned int' + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' + // CHECK-NEXT: ArraySubscriptExpr + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' + // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: ArraySectionExpr + // CHECK-NEXT: ImplicitCastExpr + // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' + // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}1 } struct SomeStruct{ diff --git a/clang/test/SemaOpenACC/update-construct.cpp b/clang/test/SemaOpenACC/update-construct.cpp index 2abd7a30eda88..8b65da69e42b9 100644 --- a/clang/test/SemaOpenACC/update-construct.cpp +++ b/clang/test/SemaOpenACC/update-construct.cpp @@ -10,9 +10,7 @@ void uses() { #pragma acc update if(true) self(Var) #pragma acc update if_present self(Var) #pragma acc update self(Var) - // expected-warning@+1{{OpenACC clause 'host' not yet implemented}} #pragma acc update host(Var) - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} #pragma acc update device(Var) // expected-error@+2{{OpenACC clause 'if' may not follow a 'device_type' clause in a 'update' construct}} @@ -56,35 +54,29 @@ void uses() { // Cannot be the body of an 'if', 'while', 'do', 'switch', or // 'label'. - // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following an if statement}} + // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following an if statement}} if (true) - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} #pragma acc update device(Var) - // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following a while statement}} + // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following a while statement}} while (true) - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} #pragma acc update device(Var) - // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following a do statement}} + // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following a do statement}} do - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} #pragma acc update device(Var) while (true); - // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following a switch statement}} + // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following a switch statement}} switch(Var) - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} #pragma acc update device(Var) - // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following a label statement}} + // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following a label statement}} LABEL: - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} #pragma acc update device(Var) // For loops are OK. for (;;) - // expected-warning@+1{{OpenACC clause 'device' not yet implemented}} #pragma acc update device(Var) // Checking for 'async', which requires an 'int' expression. @@ -132,11 +124,19 @@ void varlist_restrictions_templ() { // Members of a subarray of struct or class type may not appear, but others // are permitted to. #pragma acc update self(iArray[0:1]) +#pragma acc update host(iArray[0:1]) +#pragma acc update device(iArray[0:1]) #pragma acc update self(Array[0:1]) +#pragma acc update host(Array[0:1]) +#pragma acc update device(Array[0:1]) // expected-error@+1{{OpenACC sub-array is not allowed here}} #pragma acc update self(Array[0:1].MemberOfComp) + // expected-error@+1{{OpenACC sub-array is not allowed here}} +#pragma acc update host(Array[0:1].MemberOfComp) + // expected-error@+1{{OpenACC sub-array is not allowed here}} +#pragma acc update device(Array[0:1].MemberOfComp) } void varlist_restrictions() { @@ -149,19 +149,33 @@ void varlist_restrictions() { int *LocalPtr; #pragma acc update self(LocalInt, LocalPtr, Single) +#pragma acc update host(LocalInt, LocalPtr, Single) +#pragma acc update device(LocalInt, LocalPtr, Single) #pragma acc update self(Single.MemberOfComp) +#pragma acc update host(Single.MemberOfComp) +#pragma acc update device(Single.MemberOfComp) #pragma acc update self(Single.Array[0:1]) +#pragma acc update host(Single.Array[0:1]) +#pragma acc update device(Single.Array[0:1]) // Members of a subarray of struct or class type may not appear, but others // are permitted to. #pragma acc update self(iArray[0:1]) +#pragma acc update host(iArray[0:1]) +#pragma acc update device(iArray[0:1]) #pragma acc update self(Array[0:1]) +#pragma acc update host(Array[0:1]) +#pragma acc update device(Array[0:1]) // expected-error@+1{{OpenACC sub-array is not allowed here}} #pragma acc update self(Array[0:1].MemberOfComp) + // expected-error@+1{{OpenACC sub-array is not allowed here}} +#pragma acc update host(Array[0:1].MemberOfComp) + // expected-error@+1{{OpenACC sub-array is not allowed here}} +#pragma acc update device(Array[0:1].MemberOfComp) } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 5e51fc4e2f66c..e175aab4499ff 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2877,6 +2877,14 @@ void OpenACCClauseEnqueue::VisitPrivateClause(const OpenACCPrivateClause &C) { VisitVarList(C); } +void OpenACCClauseEnqueue::VisitHostClause(const OpenACCHostClause &C) { + VisitVarList(C); +} + +void OpenACCClauseEnqueue::VisitDeviceClause(const OpenACCDeviceClause &C) { + VisitVarList(C); +} + void OpenACCClauseEnqueue::VisitFirstPrivateClause( const OpenACCFirstPrivateClause &C) { VisitVarList(C); From 553fa204ed5ab4c48bc6080451df24310c00e69c Mon Sep 17 00:00:00 2001 From: erichkeane Date: Thu, 9 Jan 2025 08:59:31 -0800 Subject: [PATCH 15/79] [OpenACC] Implement 'at least one of' restriction for 'update' This completes the implementation of 'update' by implementing its last restriction. This restriction requires at least 1 of the 'self', 'host', or 'device' clauses. --- clang/lib/Sema/SemaOpenACC.cpp | 13 ++- .../ast-print-openacc-update-construct.cpp | 48 ++++---- clang/test/ParserOpenACC/parse-clauses.c | 2 +- clang/test/ParserOpenACC/parse-constructs.c | 1 + .../test/SemaOpenACC/update-construct-ast.cpp | 104 +++++++++++------- clang/test/SemaOpenACC/update-construct.cpp | 48 ++++---- 6 files changed, 129 insertions(+), 87 deletions(-) diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index a2973f1f99b2c..9ded913638fb3 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -3732,8 +3732,17 @@ bool SemaOpenACC::ActOnStartStmtDirective( OpenACCClauseKind::DeviceType, OpenACCClauseKind::If}); - // TODO: OpenACC: 'Update' construct needs to have one of 'self', 'host', or - // 'device'. Implement here. + // OpenACC3.3 2.14.4: At least one self, host, or device clause must appear on + // an update directive. + if (K == OpenACCDirectiveKind::Update && + llvm::find_if(Clauses, llvm::IsaPred) == + Clauses.end()) + return Diag(StartLoc, diag::err_acc_construct_one_clause_of) + << K + << GetListOfClauses({OpenACCClauseKind::Self, + OpenACCClauseKind::Host, + OpenACCClauseKind::Device}); return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/true); } diff --git a/clang/test/AST/ast-print-openacc-update-construct.cpp b/clang/test/AST/ast-print-openacc-update-construct.cpp index 0d09c829929dc..f999e1037e904 100644 --- a/clang/test/AST/ast-print-openacc-update-construct.cpp +++ b/clang/test/AST/ast-print-openacc-update-construct.cpp @@ -3,38 +3,38 @@ void uses(bool cond) { int I; int *iPtr; int array[5]; - // CHECK: #pragma acc update -#pragma acc update + // CHECK: #pragma acc update self(I) +#pragma acc update self(I) -// CHECK: #pragma acc update if_present -#pragma acc update if_present -// CHECK: #pragma acc update if(cond) -#pragma acc update if(cond) +// CHECK: #pragma acc update self(I) if_present +#pragma acc update self(I) if_present +// CHECK: #pragma acc update self(I) if(cond) +#pragma acc update self(I) if(cond) -// CHECK: #pragma acc update async -#pragma acc update async -// CHECK: #pragma acc update async(*iPtr) -#pragma acc update async(*iPtr) -// CHECK: #pragma acc update async(I) -#pragma acc update async(I) +// CHECK: #pragma acc update self(I) async +#pragma acc update self(I) async +// CHECK: #pragma acc update self(I) async(*iPtr) +#pragma acc update self(I) async(*iPtr) +// CHECK: #pragma acc update self(I) async(I) +#pragma acc update self(I) async(I) -// CHECK: #pragma acc update wait(*iPtr, I) async -#pragma acc update wait(*iPtr, I) async +// CHECK: #pragma acc update self(I) wait(*iPtr, I) async +#pragma acc update self(I) wait(*iPtr, I) async -// CHECK: #pragma acc update wait(queues: *iPtr, I) async(*iPtr) -#pragma acc update wait(queues:*iPtr, I) async(*iPtr) +// CHECK: #pragma acc update self(I) wait(queues: *iPtr, I) async(*iPtr) +#pragma acc update self(I) wait(queues:*iPtr, I) async(*iPtr) -// CHECK: #pragma acc update wait(devnum: I : *iPtr, I) async(I) -#pragma acc update wait(devnum:I:*iPtr, I) async(I) +// CHECK: #pragma acc update self(I) wait(devnum: I : *iPtr, I) async(I) +#pragma acc update self(I) wait(devnum:I:*iPtr, I) async(I) -// CHECK: #pragma acc update wait(devnum: I : queues: *iPtr, I) if(I == array[I]) async(I) -#pragma acc update wait(devnum:I:queues:*iPtr, I) if(I == array[I]) async(I) +// CHECK: #pragma acc update self(I) wait(devnum: I : queues: *iPtr, I) if(I == array[I]) async(I) +#pragma acc update self(I) wait(devnum:I:queues:*iPtr, I) if(I == array[I]) async(I) -// CHECK: #pragma acc update device_type(I) dtype(H) -#pragma acc update device_type(I) dtype(H) +// CHECK: #pragma acc update self(I) device_type(I) dtype(H) +#pragma acc update self(I) device_type(I) dtype(H) -// CHECK: #pragma acc update device_type(J) dtype(K) -#pragma acc update device_type(J) dtype(K) +// CHECK: #pragma acc update self(I) device_type(J) dtype(K) +#pragma acc update self(I) device_type(J) dtype(K) // CHECK: #pragma acc update self(I, iPtr, array, array[1], array[1:2]) #pragma acc update self(I, iPtr, array, array[1], array[1:2]) diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c index 930cc1e4c0353..b871624b6e943 100644 --- a/clang/test/ParserOpenACC/parse-clauses.c +++ b/clang/test/ParserOpenACC/parse-clauses.c @@ -344,7 +344,7 @@ void SelfUpdate() { struct Members s; // expected-error@+1{{expected '('}} -#pragma acc update self +#pragma acc update host(s) self for(int i = 0; i < 5;++i) {} // expected-error@+3{{use of undeclared identifier 'zero'}} diff --git a/clang/test/ParserOpenACC/parse-constructs.c b/clang/test/ParserOpenACC/parse-constructs.c index 9948e33ac94d1..886a912713c58 100644 --- a/clang/test/ParserOpenACC/parse-constructs.c +++ b/clang/test/ParserOpenACC/parse-constructs.c @@ -151,6 +151,7 @@ void func() { // expected-error@+1{{OpenACC 'set' construct must have at least one 'default_async', 'device_num', 'device_type' or 'if' clause}} #pragma acc set clause list for(;;){} + // expected-error@+2{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}} // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc update clause list for(;;){} diff --git a/clang/test/SemaOpenACC/update-construct-ast.cpp b/clang/test/SemaOpenACC/update-construct-ast.cpp index 5582bde7a64c2..4238f93d90a34 100644 --- a/clang/test/SemaOpenACC/update-construct-ast.cpp +++ b/clang/test/SemaOpenACC/update-construct-ast.cpp @@ -18,8 +18,10 @@ void NormalFunc() { // CHECK-LABEL: NormalFunc // CHECK-NEXT: CompoundStmt -#pragma acc update if_present if (some_int() < some_long()) +#pragma acc update self(Global) if_present if (some_int() < some_long()) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int' // CHECK-NEXT: if_present clause // CHECK-NEXT: if clause // CHECK-NEXT: BinaryOperator{{.*}}'bool' '<' @@ -31,15 +33,19 @@ void NormalFunc() { // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' 'long ()' -#pragma acc update wait async device_type(A) dtype(B) +#pragma acc update self(Global) wait async device_type(A) dtype(B) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int' // CHECK-NEXT: wait clause // CHECK-NEXT: <<>> // CHECK-NEXT: async clause // CHECK-NEXT: device_type(A) // CHECK-NEXT: dtype(B) -#pragma acc update wait(some_int(), some_long()) async(some_int()) +#pragma acc update self(Global) wait(some_int(), some_long()) async(some_int()) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int' // CHECK-NEXT: wait clause // CHECK-NEXT: <<>> // CHECK-NEXT: CallExpr{{.*}}'int' @@ -52,8 +58,10 @@ void NormalFunc() { // CHECK-NEXT: CallExpr{{.*}}'int' // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' 'int ()' -#pragma acc update wait(queues:some_int(), some_long()) +#pragma acc update self(Global) wait(queues:some_int(), some_long()) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int' // CHECK-NEXT: wait clause // CHECK-NEXT: <<>> // CHECK-NEXT: CallExpr{{.*}}'int' @@ -62,8 +70,10 @@ void NormalFunc() { // CHECK-NEXT: CallExpr{{.*}}'long' // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' 'long ()' -#pragma acc update wait(devnum: some_int() :some_int(), some_long()) +#pragma acc update self(Global) wait(devnum: some_int() :some_int(), some_long()) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int' // CHECK-NEXT: wait clause // CHECK-NEXT: CallExpr{{.*}}'int' // CHECK-NEXT: ImplicitCastExpr @@ -83,12 +93,12 @@ void NormalFunc() { // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' - // CHECK-NEXT: IntegerLiteral{{.*}} 0 + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0 // CHECK-NEXT: ArraySectionExpr // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' - // CHECK-NEXT: IntegerLiteral{{.*}} 0 - // CHECK-NEXT: IntegerLiteral{{.*}} 1 + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0 + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 1 #pragma acc update host(Global, GlobalArray, GlobalArray[0], GlobalArray[0:1]) device(Global, GlobalArray, GlobalArray[0], GlobalArray[0:1]) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update @@ -98,24 +108,24 @@ void NormalFunc() { // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' - // CHECK-NEXT: IntegerLiteral{{.*}} 0 + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0 // CHECK-NEXT: ArraySectionExpr // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' - // CHECK-NEXT: IntegerLiteral{{.*}} 0 - // CHECK-NEXT: IntegerLiteral{{.*}} 1 + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0 + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 1 // CHECK-NEXT: device clause // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int' // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' - // CHECK-NEXT: IntegerLiteral{{.*}} 0 + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0 // CHECK-NEXT: ArraySectionExpr // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]' - // CHECK-NEXT: IntegerLiteral{{.*}} 0 - // CHECK-NEXT: IntegerLiteral{{.*}} 1 + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0 + // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 1 } template @@ -126,8 +136,10 @@ void TemplFunc(T t) { // CHECK-NEXT: ParmVarDecl{{.*}} t 'T' // CHECK-NEXT: CompoundStmt -#pragma acc update if_present if (T::value < t) +#pragma acc update self(t) if_present if (T::value < t) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T' // CHECK-NEXT: if_present clause // CHECK-NEXT: if clause // CHECK-NEXT: BinaryOperator{{.*}}'' '<' @@ -135,15 +147,19 @@ void TemplFunc(T t) { // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' // CHECK-NEXT: DeclRefExpr{{.*}}'t' 'T' -#pragma acc update wait async device_type(T) dtype(U) +#pragma acc update self(t) wait async device_type(T) dtype(U) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T' // CHECK-NEXT: wait clause // CHECK-NEXT: <<>> // CHECK-NEXT: async clause // CHECK-NEXT: device_type(T) // CHECK-NEXT: dtype(U) -#pragma acc update wait(T::value, t) async(T::value) +#pragma acc update self(t) wait(T::value, t) async(T::value) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T' // CHECK-NEXT: wait clause // CHECK-NEXT: <<>> // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}}'' @@ -152,8 +168,10 @@ void TemplFunc(T t) { // CHECK-NEXT: async clause // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}}'' // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' -#pragma acc update wait(queues:T::value, t) async(t) +#pragma acc update self(t) wait(queues:T::value, t) async(t) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T' // CHECK-NEXT: wait clause // CHECK-NEXT: <<>> // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}}'' @@ -161,8 +179,10 @@ void TemplFunc(T t) { // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T' // CHECK-NEXT: async clause // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T' -#pragma acc update wait(devnum: T::value:t, T::value) +#pragma acc update self(t) wait(devnum: T::value:t, T::value) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T' // CHECK-NEXT: wait clause // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}}'' // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T' @@ -184,11 +204,11 @@ void TemplFunc(T t) { // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' // CHECK-NEXT: ArraySubscriptExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 // CHECK-NEXT: ArraySectionExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 - // CHECK-NEXT: IntegerLiteral{{.*}}1 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1 #pragma acc update host(Local, LocalArray, LocalArray[0], LocalArray[0:1]) device(Local, LocalArray, LocalArray[0], LocalArray[0:1]) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update @@ -197,21 +217,21 @@ void TemplFunc(T t) { // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' // CHECK-NEXT: ArraySubscriptExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 // CHECK-NEXT: ArraySectionExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 - // CHECK-NEXT: IntegerLiteral{{.*}}1 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1 // CHECK-NEXT: device clause // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(T::value)' // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' // CHECK-NEXT: ArraySubscriptExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 // CHECK-NEXT: ArraySectionExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 - // CHECK-NEXT: IntegerLiteral{{.*}}1 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1 // Instantiation: // CHECK-NEXT: FunctionDecl{{.*}} TemplFunc 'void (SomeStruct)' implicit_instantiation @@ -222,6 +242,8 @@ void TemplFunc(T t) { // CHECK-NEXT: CompoundStmt // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct' // CHECK-NEXT: if_present clause // CHECK-NEXT: if clause // CHECK-NEXT: BinaryOperator{{.*}}'bool' '<' @@ -234,6 +256,8 @@ void TemplFunc(T t) { // CHECK-NEXT: DeclRefExpr{{.*}}'t' 'SomeStruct' // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct' // CHECK-NEXT: wait clause // CHECK-NEXT: <<>> // CHECK-NEXT: async clause @@ -241,6 +265,8 @@ void TemplFunc(T t) { // CHECK-NEXT: dtype(U) // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct' // CHECK-NEXT: wait clause // CHECK-NEXT: <<>> // CHECK-NEXT: ImplicitCastExpr{{.*}}'unsigned int' @@ -256,6 +282,8 @@ void TemplFunc(T t) { // CHECK-NEXT: NestedNameSpecifier TypeSpec 'SomeStruct' // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct' // CHECK-NEXT: wait clause // CHECK-NEXT: <<>> // CHECK-NEXT: ImplicitCastExpr{{.*}}'unsigned int' @@ -272,6 +300,8 @@ void TemplFunc(T t) { // CHECK-NEXT: DeclRefExpr{{.*}}'t' 'SomeStruct' // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update + // CHECK-NEXT: self clause + // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct' // CHECK-NEXT: wait clause // CHECK-NEXT: ImplicitCastExpr{{.*}}'unsigned int' // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const unsigned int' @@ -299,12 +329,12 @@ void TemplFunc(T t) { // CHECK-NEXT: ArraySubscriptExpr // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 // CHECK-NEXT: ArraySectionExpr // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 - // CHECK-NEXT: IntegerLiteral{{.*}}1 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1 // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update // CHECK-NEXT: host clause @@ -313,24 +343,24 @@ void TemplFunc(T t) { // CHECK-NEXT: ArraySubscriptExpr // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 // CHECK-NEXT: ArraySectionExpr // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 - // CHECK-NEXT: IntegerLiteral{{.*}}1 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1 // CHECK-NEXT: device clause // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(SomeStruct::value)':'const unsigned int' // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' // CHECK-NEXT: ArraySubscriptExpr // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 // CHECK-NEXT: ArraySectionExpr // CHECK-NEXT: ImplicitCastExpr // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]' - // CHECK-NEXT: IntegerLiteral{{.*}}0 - // CHECK-NEXT: IntegerLiteral{{.*}}1 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0 + // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1 } struct SomeStruct{ diff --git a/clang/test/SemaOpenACC/update-construct.cpp b/clang/test/SemaOpenACC/update-construct.cpp index 8b65da69e42b9..30c079c8befd4 100644 --- a/clang/test/SemaOpenACC/update-construct.cpp +++ b/clang/test/SemaOpenACC/update-construct.cpp @@ -21,36 +21,38 @@ void uses() { #pragma acc update self(Var) device_type(I) if_present // expected-error@+2{{OpenACC clause 'self' may not follow a 'device_type' clause in a 'update' construct}} // expected-note@+1{{previous clause is here}} -#pragma acc update device_type(I) self(Var) +#pragma acc update self(Var) device_type(I) self(Var) // expected-error@+2{{OpenACC clause 'host' may not follow a 'device_type' clause in a 'update' construct}} // expected-note@+1{{previous clause is here}} -#pragma acc update device_type(I) host(Var) +#pragma acc update self(Var) device_type(I) host(Var) // expected-error@+2{{OpenACC clause 'device' may not follow a 'device_type' clause in a 'update' construct}} // expected-note@+1{{previous clause is here}} -#pragma acc update device_type(I) device(Var) +#pragma acc update self(Var) device_type(I) device(Var) // These 2 are OK. #pragma acc update self(Var) device_type(I) async #pragma acc update self(Var) device_type(I) wait // Unless otherwise specified, we assume 'device_type' can happen after itself. #pragma acc update self(Var) device_type(I) device_type(I) - // TODO: OpenACC: These should diagnose because there isn't at least 1 of - // 'self', 'host', or 'device'. + // These diagnose because there isn't at least 1 of 'self', 'host', or + // 'device'. + // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}} #pragma acc update async + // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}} #pragma acc update wait + // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}} #pragma acc update device_type(I) + // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}} #pragma acc update if(true) + // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}} #pragma acc update if_present // expected-error@+1{{value of type 'struct NotConvertible' is not contextually convertible to 'bool'}} -#pragma acc update if (NC) device_type(I) +#pragma acc update self(Var) if (NC) device_type(I) // expected-error@+2{{OpenACC 'if' clause cannot appear more than once on a 'update' directive}} // expected-note@+1{{previous clause is here}} -#pragma acc update if(true) if (false) - - // TODO: OpenACC: There is restrictions on the contents of a 'varlist', so - // those should be checked here too. +#pragma acc update self(Var) if(true) if (false) // Cannot be the body of an 'if', 'while', 'do', 'switch', or // 'label'. @@ -80,34 +82,34 @@ void uses() { #pragma acc update device(Var) // Checking for 'async', which requires an 'int' expression. -#pragma acc update async +#pragma acc update self(Var) async -#pragma acc update async(getI()) +#pragma acc update self(Var) async(getI()) // expected-error@+2{{expected ')'}} // expected-note@+1{{to match this '('}} -#pragma acc update async(getI(), getI()) +#pragma acc update self(Var) async(getI(), getI()) // expected-error@+2{{OpenACC 'async' clause cannot appear more than once on a 'update' directive}} // expected-note@+1{{previous clause is here}} -#pragma acc update async(getI()) async(getI()) +#pragma acc update self(Var) async(getI()) async(getI()) // expected-error@+1{{OpenACC clause 'async' requires expression of integer type ('struct NotConvertible' invalid)}} -#pragma acc update async(NC) +#pragma acc update self(Var) async(NC) // Checking for 'wait', which has a complicated set arguments. -#pragma acc update wait -#pragma acc update wait() -#pragma acc update wait(getI(), getI()) -#pragma acc update wait(devnum: getI(): getI()) -#pragma acc update wait(devnum: getI(): queues: getI(), getI()) +#pragma acc update self(Var) wait +#pragma acc update self(Var) wait() +#pragma acc update self(Var) wait(getI(), getI()) +#pragma acc update self(Var) wait(devnum: getI(): getI()) +#pragma acc update self(Var) wait(devnum: getI(): queues: getI(), getI()) // expected-error@+1{{OpenACC clause 'wait' requires expression of integer type ('struct NotConvertible' invalid)}} -#pragma acc update wait(devnum:NC : 5) +#pragma acc update self(Var) wait(devnum:NC : 5) // expected-error@+1{{OpenACC clause 'wait' requires expression of integer type ('struct NotConvertible' invalid)}} -#pragma acc update wait(devnum:5 : NC) +#pragma acc update self(Var) wait(devnum:5 : NC) int arr[5]; // expected-error@+3{{OpenACC clause 'wait' requires expression of integer type ('int[5]' invalid)}} // expected-error@+2{{OpenACC clause 'wait' requires expression of integer type ('int[5]' invalid)}} // expected-error@+1{{OpenACC clause 'wait' requires expression of integer type ('struct NotConvertible' invalid)}} -#pragma acc update wait(devnum:arr : queues: arr, NC, 5) +#pragma acc update self(Var) wait(devnum:arr : queues: arr, NC, 5) } struct SomeS { From 8ac6a6b81629840d2456e32ec4651440523718dc Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 9 Jan 2025 18:33:42 +0100 Subject: [PATCH 16/79] Reorder fields so InitDeferredFlag is destroyed last TimerGroup's dtor accesses this field. once_flag has a trivial destructor so it doesn't really matter, but msan checks that it's not accessed after destruction. --- llvm/lib/Support/Timer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp index ccd78ad86e91c..3f0926ae0f3cb 100644 --- a/llvm/lib/Support/Timer.cpp +++ b/llvm/lib/Support/Timer.cpp @@ -507,11 +507,11 @@ class llvm::TimerGlobals { // Order of these members and initialization below is important. For example // the DefaultTimerGroup uses the TimerLock. Most of these also depend on the // options above. + std::once_flag InitDeferredFlag; std::unique_ptr SignpostsPtr; std::unique_ptr> TimerLockPtr; std::unique_ptr DefaultTimerGroupPtr; std::unique_ptr NamedGroupedTimersPtr; - std::once_flag InitDeferredFlag; TimerGlobals &initDeferred() { std::call_once(InitDeferredFlag, [this]() { SignpostsPtr = std::make_unique(); From 43e663d0fd82e226203b051d31fd3ce5388ca984 Mon Sep 17 00:00:00 2001 From: fahadnayyar <30953967+fahadnayyar@users.noreply.github.com> Date: Thu, 9 Jan 2025 09:42:24 -0800 Subject: [PATCH 17/79] [APINotes] [NFC] Add tests for SWIFT_RETURNS_(UN)RETAINED for ObjC APIs (#122167) Adding test case to verify that SwiftReturnOwnership works correctly for ObjC functions and methods as well. rdar://142504115 --- .../SwiftReturnOwnershipForObjC.apinotes | 17 +++++++++++++++++ .../Headers/SwiftReturnOwnershipForObjC.h | 9 +++++++++ .../APINotes/Inputs/Headers/module.modulemap | 4 ++++ clang/test/APINotes/swift-return-ownership.m | 11 +++++++++++ 4 files changed, 41 insertions(+) create mode 100644 clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.apinotes create mode 100644 clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.h create mode 100644 clang/test/APINotes/swift-return-ownership.m diff --git a/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.apinotes b/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.apinotes new file mode 100644 index 0000000000000..4b749b40e026f --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.apinotes @@ -0,0 +1,17 @@ +--- +Name: SwiftImportAsForObjC +Classes: + - Name: MethodTest + Methods: + - Selector: getUnowned + MethodKind: Instance + SwiftReturnOwnership: unretained + - Selector: getOwned + MethodKind: Instance + SwiftReturnOwnership: retained + +Functions: + - Name: getObjCUnowned + SwiftReturnOwnership: unretained + - Name: getObjCOwned + SwiftReturnOwnership: retained diff --git a/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.h b/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.h new file mode 100644 index 0000000000000..83e2535387caa --- /dev/null +++ b/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.h @@ -0,0 +1,9 @@ +struct RefCountedType { int value; }; + +@interface MethodTest +- (struct RefCountedType *)getUnowned; +- (struct RefCountedType *)getOwned; +@end + +struct RefCountedType * getObjCUnowned(void); +struct RefCountedType * getObjCOwned(void); diff --git a/clang/test/APINotes/Inputs/Headers/module.modulemap b/clang/test/APINotes/Inputs/Headers/module.modulemap index 31f7d36356d83..bedb7d505f794 100644 --- a/clang/test/APINotes/Inputs/Headers/module.modulemap +++ b/clang/test/APINotes/Inputs/Headers/module.modulemap @@ -57,3 +57,7 @@ module Templates { module SwiftImportAs { header "SwiftImportAs.h" } + +module SwiftReturnOwnershipForObjC { + header "SwiftReturnOwnershipForObjC.h" +} diff --git a/clang/test/APINotes/swift-return-ownership.m b/clang/test/APINotes/swift-return-ownership.m new file mode 100644 index 0000000000000..b1221d159e610 --- /dev/null +++ b/clang/test/APINotes/swift-return-ownership.m @@ -0,0 +1,11 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s +// RUN: %clang_cc1 -ast-print %t/ModulesCache/SwiftReturnOwnershipForObjC.pcm | FileCheck %s +#import + +// CHECK: @interface MethodTest +// CHECK: - (struct RefCountedType *)getUnowned __attribute__((swift_attr("returns_unretained"))); +// CHECK: - (struct RefCountedType *)getOwned __attribute__((swift_attr("returns_retained"))); +// CHECK: @end +// CHECK: __attribute__((swift_attr("returns_unretained"))) struct RefCountedType *getObjCUnowned(void); +// CHECK: __attribute__((swift_attr("returns_retained"))) struct RefCountedType *getObjCOwned(void); From 50ca2eff5fb2c239a373790bcbe384f629eb077a Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 9 Jan 2025 09:56:44 -0800 Subject: [PATCH 18/79] Drop emitQuaternaryBuiltin from clang (#122169) It was superceeded by the emitBuiltinWithOneOverloadedType() some time ago. --- clang/lib/CodeGen/CGBuiltin.cpp | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 573be932f8b1a..ca03fb665d423 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -774,18 +774,6 @@ static Value *emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, Args, Name); } -// Emit an intrinsic that has 4 operands of the same type as its result. -static Value *emitQuaternaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, - unsigned IntrinsicID) { - llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); - llvm::Value *Src3 = CGF.EmitScalarExpr(E->getArg(3)); - - Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); - return CGF.Builder.CreateCall(F, {Src0, Src1, Src2, Src3}); -} - // Emit an intrinsic that has 1 float or double operand, and 1 integer. static Value *emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, @@ -20443,7 +20431,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_bitop3_b32: case AMDGPU::BI__builtin_amdgcn_bitop3_b16: - return emitQuaternaryBuiltin(*this, E, Intrinsic::amdgcn_bitop3); + return emitBuiltinWithOneOverloadedType<4>(*this, E, + Intrinsic::amdgcn_bitop3); case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: return emitBuiltinWithOneOverloadedType<4>( *this, E, Intrinsic::amdgcn_make_buffer_rsrc); From 69b54c1a05c0c63ee28de1279b3a689b7f026e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=98=D0=B7?= =?UTF-8?q?=D0=B2=D0=BE=D0=BB=D0=BE=D0=B2?= Date: Thu, 9 Jan 2025 21:02:35 +0300 Subject: [PATCH 19/79] [libcxx][algorithm] Optimize std::stable_sort via radix sort algorithm (#104683) The radix sort (LSD) algorithm allows to speed up std::stable_sort dramatically in case we sort integers. The speed up varies from a relatively small to x10 times, depending on type of sorted elements and the initial state of the sorted array. ``` Running ./libcxx/test/benchmarks/stable_sort.bench.out Run on (12 X 2600 MHz CPU s) CPU Caches: L1 Data 32 KiB L1 Instruction 32 KiB L2 Unified 256 KiB (x6) L3 Unified 12288 KiB Load Average: 3.48, 3.38, 3.08 --------------------------------------------------------------------------- Benchmark After Before --------------------------------------------------------------------------- BM_StableSort_int8_Random_1 3.39 ns 3.58 ns BM_StableSort_int8_Random_4 21.1 ns 21.9 ns BM_StableSort_int8_Random_16 142 ns 147 ns BM_StableSort_int8_Random_64 893 ns 903 ns BM_StableSort_int8_Random_256 409 ns 5810 ns BM_StableSort_int8_Random_1024 1235 ns 29973 ns BM_StableSort_int8_Random_4096 4410 ns 141880 ns BM_StableSort_int8_Random_16384 18044 ns 620540 ns BM_StableSort_int8_Random_65536 144030 ns 2592013 ns BM_StableSort_int8_Random_262144 858350 ns 10935814 ns BM_StableSort_int8_Random_524288 2929988 ns 27060729 ns BM_StableSort_int8_Random_1048576 6058292 ns 49622720 ns BM_StableSort_int8_Ascending_1 3.42 ns 3.92 ns BM_StableSort_int8_Ascending_4 5.86 ns 8.08 ns BM_StableSort_int8_Ascending_16 10.6 ns 12.0 ns BM_StableSort_int8_Ascending_64 28.9 ns 30.6 ns BM_StableSort_int8_Ascending_256 415 ns 391 ns BM_StableSort_int8_Ascending_1024 1666 ns 2309 ns BM_StableSort_int8_Ascending_4096 7748 ns 12269 ns BM_StableSort_int8_Ascending_16384 40588 ns 60181 ns BM_StableSort_int8_Ascending_65536 178843 ns 298221 ns BM_StableSort_int8_Ascending_262144 919959 ns 1402692 ns BM_StableSort_int8_Ascending_524288 2397397 ns 3036984 ns BM_StableSort_int8_Ascending_1048576 5080043 ns 7218581 ns BM_StableSort_int8_Descending_1 3.44 ns 3.53 ns BM_StableSort_int8_Descending_4 7.94 ns 8.29 ns BM_StableSort_int8_Descending_16 59.6 ns 57.7 ns BM_StableSort_int8_Descending_64 1051 ns 1027 ns BM_StableSort_int8_Descending_256 422 ns 4718 ns BM_StableSort_int8_Descending_1024 1676 ns 21044 ns BM_StableSort_int8_Descending_4096 7766 ns 64827 ns BM_StableSort_int8_Descending_16384 40230 ns 93981 ns BM_StableSort_int8_Descending_65536 190978 ns 421151 ns BM_StableSort_int8_Descending_262144 1055141 ns 1918927 ns BM_StableSort_int8_Descending_524288 2875115 ns 3809153 ns BM_StableSort_int8_Descending_1048576 5854135 ns 8713690 ns BM_StableSort_int8_SingleElement_1 3.52 ns 3.46 ns BM_StableSort_int8_SingleElement_4 6.25 ns 5.79 ns BM_StableSort_int8_SingleElement_16 10.7 ns 11.4 ns BM_StableSort_int8_SingleElement_64 29.3 ns 30.3 ns BM_StableSort_int8_SingleElement_256 858 ns 380 ns BM_StableSort_int8_SingleElement_1024 3036 ns 2231 ns BM_StableSort_int8_SingleElement_4096 11580 ns 11866 ns BM_StableSort_int8_SingleElement_16384 44956 ns 59621 ns BM_StableSort_int8_SingleElement_65536 182006 ns 297853 ns BM_StableSort_int8_SingleElement_262144 962181 ns 1432857 ns BM_StableSort_int8_SingleElement_524288 2256687 ns 2975707 ns BM_StableSort_int8_SingleElement_1048576 4522556 ns 6949948 ns BM_StableSort_int8_PipeOrgan_1 3.26 ns 3.64 ns BM_StableSort_int8_PipeOrgan_4 6.21 ns 6.58 ns BM_StableSort_int8_PipeOrgan_16 23.7 ns 25.4 ns BM_StableSort_int8_PipeOrgan_64 250 ns 248 ns BM_StableSort_int8_PipeOrgan_256 414 ns 2498 ns BM_StableSort_int8_PipeOrgan_1024 1697 ns 10946 ns BM_StableSort_int8_PipeOrgan_4096 7840 ns 37238 ns BM_StableSort_int8_PipeOrgan_16384 41402 ns 74805 ns BM_StableSort_int8_PipeOrgan_65536 180107 ns 357891 ns BM_StableSort_int8_PipeOrgan_262144 988273 ns 1647296 ns BM_StableSort_int8_PipeOrgan_524288 2547374 ns 3245991 ns BM_StableSort_int8_PipeOrgan_1048576 5128783 ns 7342444 ns BM_StableSort_int8_QuickSortAdversary_1 3.14 ns 4.01 ns BM_StableSort_int8_QuickSortAdversary_4 6.05 ns 7.02 ns BM_StableSort_int8_QuickSortAdversary_16 10.5 ns 11.9 ns BM_StableSort_int8_QuickSortAdversary_64 520 ns 516 ns BM_StableSort_int8_QuickSortAdversary_256 920 ns 386 ns BM_StableSort_int8_QuickSortAdversary_1024 3083 ns 2299 ns BM_StableSort_int8_QuickSortAdversary_4096 11659 ns 12295 ns BM_StableSort_int8_QuickSortAdversary_16384 45721 ns 60931 ns BM_StableSort_int8_QuickSortAdversary_65536 186334 ns 295423 ns BM_StableSort_int8_QuickSortAdversary_262144 946262 ns 1399973 ns BM_StableSort_int8_QuickSortAdversary_524288 2282004 ns 2832266 ns BM_StableSort_int8_QuickSortAdversary_1048576 4691123 ns 6963253 ns BM_StableSort_uint8_Random_1 3.11 ns 3.44 ns BM_StableSort_uint8_Random_4 21.9 ns 23.1 ns BM_StableSort_uint8_Random_16 154 ns 171 ns BM_StableSort_uint8_Random_64 1000 ns 1051 ns BM_StableSort_uint8_Random_256 402 ns 6498 ns BM_StableSort_uint8_Random_1024 1176 ns 35310 ns BM_StableSort_uint8_Random_4096 4415 ns 164087 ns BM_StableSort_uint8_Random_16384 17849 ns 686769 ns BM_StableSort_uint8_Random_65536 146109 ns 2932051 ns BM_StableSort_uint8_Random_262144 876710 ns 12163988 ns BM_StableSort_uint8_Random_524288 2858089 ns 26458830 ns BM_StableSort_uint8_Random_1048576 5766942 ns 54836214 ns BM_StableSort_uint8_Ascending_1 3.11 ns 3.43 ns BM_StableSort_uint8_Ascending_4 6.18 ns 7.24 ns BM_StableSort_uint8_Ascending_16 14.5 ns 17.0 ns BM_StableSort_uint8_Ascending_64 50.7 ns 59.2 ns BM_StableSort_uint8_Ascending_256 395 ns 536 ns BM_StableSort_uint8_Ascending_1024 1752 ns 2956 ns BM_StableSort_uint8_Ascending_4096 7785 ns 15146 ns BM_StableSort_uint8_Ascending_16384 41442 ns 74136 ns BM_StableSort_uint8_Ascending_65536 180879 ns 354261 ns BM_StableSort_uint8_Ascending_262144 945880 ns 1674256 ns BM_StableSort_uint8_Ascending_524288 2287832 ns 3138581 ns BM_StableSort_uint8_Ascending_1048576 4630290 ns 7296278 ns BM_StableSort_uint8_Descending_1 3.19 ns 3.63 ns BM_StableSort_uint8_Descending_4 9.60 ns 11.5 ns BM_StableSort_uint8_Descending_16 78.3 ns 86.0 ns BM_StableSort_uint8_Descending_64 1265 ns 1308 ns BM_StableSort_uint8_Descending_256 395 ns 6556 ns BM_StableSort_uint8_Descending_1024 1712 ns 24669 ns BM_StableSort_uint8_Descending_4096 7748 ns 83407 ns BM_StableSort_uint8_Descending_16384 40779 ns 104043 ns BM_StableSort_uint8_Descending_65536 181560 ns 467680 ns BM_StableSort_uint8_Descending_262144 1146627 ns 2102769 ns BM_StableSort_uint8_Descending_524288 2874096 ns 4572229 ns BM_StableSort_uint8_Descending_1048576 5873195 ns 10170663 ns BM_StableSort_uint8_SingleElement_1 3.28 ns 3.58 ns BM_StableSort_uint8_SingleElement_4 6.44 ns 7.40 ns BM_StableSort_uint8_SingleElement_16 14.9 ns 16.4 ns BM_StableSort_uint8_SingleElement_64 51.2 ns 52.9 ns BM_StableSort_uint8_SingleElement_256 876 ns 490 ns BM_StableSort_uint8_SingleElement_1024 3041 ns 2750 ns BM_StableSort_uint8_SingleElement_4096 11947 ns 14326 ns BM_StableSort_uint8_SingleElement_16384 46669 ns 69984 ns BM_StableSort_uint8_SingleElement_65536 197903 ns 328961 ns BM_StableSort_uint8_SingleElement_262144 1031466 ns 1551436 ns BM_StableSort_uint8_SingleElement_524288 2447672 ns 3049553 ns BM_StableSort_uint8_SingleElement_1048576 4793087 ns 7615245 ns BM_StableSort_uint8_PipeOrgan_1 3.38 ns 3.56 ns BM_StableSort_uint8_PipeOrgan_4 7.16 ns 8.70 ns BM_StableSort_uint8_PipeOrgan_16 31.7 ns 35.3 ns BM_StableSort_uint8_PipeOrgan_64 326 ns 366 ns BM_StableSort_uint8_PipeOrgan_256 409 ns 2942 ns BM_StableSort_uint8_PipeOrgan_1024 1994 ns 12571 ns BM_StableSort_uint8_PipeOrgan_4096 8086 ns 46278 ns BM_StableSort_uint8_PipeOrgan_16384 41749 ns 79813 ns BM_StableSort_uint8_PipeOrgan_65536 180697 ns 375120 ns BM_StableSort_uint8_PipeOrgan_262144 1004899 ns 1676143 ns BM_StableSort_uint8_PipeOrgan_524288 2456081 ns 3333949 ns BM_StableSort_uint8_PipeOrgan_1048576 5030857 ns 7591303 ns BM_StableSort_uint8_QuickSortAdversary_1 3.12 ns 3.46 ns BM_StableSort_uint8_QuickSortAdversary_4 7.25 ns 6.83 ns BM_StableSort_uint8_QuickSortAdversary_16 14.6 ns 16.2 ns BM_StableSort_uint8_QuickSortAdversary_64 650 ns 665 ns BM_StableSort_uint8_QuickSortAdversary_256 395 ns 2982 ns BM_StableSort_uint8_QuickSortAdversary_1024 3125 ns 2583 ns BM_StableSort_uint8_QuickSortAdversary_4096 11797 ns 13929 ns BM_StableSort_uint8_QuickSortAdversary_16384 45803 ns 66513 ns BM_StableSort_uint8_QuickSortAdversary_65536 190745 ns 313467 ns BM_StableSort_uint8_QuickSortAdversary_262144 974646 ns 1469014 ns BM_StableSort_uint8_QuickSortAdversary_524288 2317553 ns 3022065 ns BM_StableSort_uint8_QuickSortAdversary_1048576 4898703 ns 6854079 ns BM_StableSort_int16_Random_1 3.94 ns 3.49 ns BM_StableSort_int16_Random_4 20.8 ns 23.2 ns BM_StableSort_int16_Random_16 133 ns 163 ns BM_StableSort_int16_Random_64 903 ns 953 ns BM_StableSort_int16_Random_256 5638 ns 6258 ns BM_StableSort_int16_Random_1024 3056 ns 34587 ns BM_StableSort_int16_Random_4096 10596 ns 168397 ns BM_StableSort_int16_Random_16384 49908 ns 753031 ns BM_StableSort_int16_Random_65536 444605 ns 3838368 ns BM_StableSort_int16_Random_262144 2419345 ns 15657285 ns BM_StableSort_int16_Random_524288 7984040 ns 32726933 ns BM_StableSort_int16_Random_1048576 16092424 ns 67999766 ns BM_StableSort_int16_Ascending_1 3.40 ns 3.43 ns BM_StableSort_int16_Ascending_4 5.45 ns 5.79 ns BM_StableSort_int16_Ascending_16 12.0 ns 15.3 ns BM_StableSort_int16_Ascending_64 39.6 ns 52.6 ns BM_StableSort_int16_Ascending_256 470 ns 550 ns BM_StableSort_int16_Ascending_1024 1686 ns 2707 ns BM_StableSort_int16_Ascending_4096 5676 ns 14165 ns BM_StableSort_int16_Ascending_16384 21413 ns 69483 ns BM_StableSort_int16_Ascending_65536 88010 ns 334466 ns BM_StableSort_int16_Ascending_262144 567239 ns 1570620 ns BM_StableSort_int16_Ascending_524288 1553063 ns 3424666 ns BM_StableSort_int16_Ascending_1048576 3145577 ns 8499649 ns BM_StableSort_int16_Descending_1 3.22 ns 3.54 ns BM_StableSort_int16_Descending_4 6.85 ns 10.2 ns BM_StableSort_int16_Descending_16 62.7 ns 62.2 ns BM_StableSort_int16_Descending_64 1138 ns 1036 ns BM_StableSort_int16_Descending_256 5541 ns 4696 ns BM_StableSort_int16_Descending_1024 3046 ns 19577 ns BM_StableSort_int16_Descending_4096 10962 ns 79149 ns BM_StableSort_int16_Descending_16384 58182 ns 327709 ns BM_StableSort_int16_Descending_65536 447025 ns 1424896 ns BM_StableSort_int16_Descending_262144 1104973 ns 5921903 ns BM_StableSort_int16_Descending_524288 2547840 ns 17956789 ns BM_StableSort_int16_Descending_1048576 5093555 ns 17044318 ns BM_StableSort_int16_SingleElement_1 3.56 ns 3.96 ns BM_StableSort_int16_SingleElement_4 5.75 ns 6.72 ns BM_StableSort_int16_SingleElement_16 12.4 ns 16.1 ns BM_StableSort_int16_SingleElement_64 36.9 ns 54.4 ns BM_StableSort_int16_SingleElement_256 473 ns 557 ns BM_StableSort_int16_SingleElement_1024 1828 ns 2826 ns BM_StableSort_int16_SingleElement_4096 6239 ns 14252 ns BM_StableSort_int16_SingleElement_16384 23695 ns 70369 ns BM_StableSort_int16_SingleElement_65536 93281 ns 361641 ns BM_StableSort_int16_SingleElement_262144 599078 ns 1640216 ns BM_StableSort_int16_SingleElement_524288 1659678 ns 3343087 ns BM_StableSort_int16_SingleElement_1048576 3184033 ns 7770271 ns BM_StableSort_int16_PipeOrgan_1 3.75 ns 3.76 ns BM_StableSort_int16_PipeOrgan_4 5.94 ns 7.74 ns BM_StableSort_int16_PipeOrgan_16 26.7 ns 25.9 ns BM_StableSort_int16_PipeOrgan_64 300 ns 263 ns BM_StableSort_int16_PipeOrgan_256 2769 ns 2760 ns BM_StableSort_int16_PipeOrgan_1024 2996 ns 10544 ns BM_StableSort_int16_PipeOrgan_4096 11641 ns 44750 ns BM_StableSort_int16_PipeOrgan_16384 57224 ns 200464 ns BM_StableSort_int16_PipeOrgan_65536 416873 ns 887631 ns BM_StableSort_int16_PipeOrgan_262144 843264 ns 3588669 ns BM_StableSort_int16_PipeOrgan_524288 2027741 ns 11056924 ns BM_StableSort_int16_PipeOrgan_1048576 4223773 ns 13261276 ns BM_StableSort_int16_QuickSortAdversary_1 3.83 ns 3.68 ns BM_StableSort_int16_QuickSortAdversary_4 5.55 ns 6.93 ns BM_StableSort_int16_QuickSortAdversary_16 12.3 ns 15.2 ns BM_StableSort_int16_QuickSortAdversary_64 646 ns 632 ns BM_StableSort_int16_QuickSortAdversary_256 2751 ns 2542 ns BM_StableSort_int16_QuickSortAdversary_1024 3028 ns 16901 ns BM_StableSort_int16_QuickSortAdversary_4096 10862 ns 80222 ns BM_StableSort_int16_QuickSortAdversary_16384 57753 ns 317281 ns BM_StableSort_int16_QuickSortAdversary_65536 94064 ns 328502 ns BM_StableSort_int16_QuickSortAdversary_262144 557796 ns 1613208 ns BM_StableSort_int16_QuickSortAdversary_524288 1518451 ns 3479740 ns BM_StableSort_int16_QuickSortAdversary_1048576 3165129 ns 7655880 ns BM_StableSort_uint16_Random_1 3.26 ns 3.44 ns BM_StableSort_uint16_Random_4 21.1 ns 22.2 ns BM_StableSort_uint16_Random_16 157 ns 156 ns BM_StableSort_uint16_Random_64 955 ns 947 ns BM_StableSort_uint16_Random_256 5886 ns 6097 ns BM_StableSort_uint16_Random_1024 2787 ns 30776 ns BM_StableSort_uint16_Random_4096 9973 ns 155652 ns BM_StableSort_uint16_Random_16384 48628 ns 741072 ns BM_StableSort_uint16_Random_65536 439609 ns 3478966 ns BM_StableSort_uint16_Random_262144 2336983 ns 15197642 ns BM_StableSort_uint16_Random_524288 7888701 ns 34234254 ns BM_StableSort_uint16_Random_1048576 14865180 ns 68516386 ns BM_StableSort_uint16_Ascending_1 3.33 ns 4.00 ns BM_StableSort_uint16_Ascending_4 5.79 ns 6.64 ns BM_StableSort_uint16_Ascending_16 14.9 ns 15.5 ns BM_StableSort_uint16_Ascending_64 50.2 ns 52.5 ns BM_StableSort_uint16_Ascending_256 538 ns 546 ns BM_StableSort_uint16_Ascending_1024 1645 ns 2652 ns BM_StableSort_uint16_Ascending_4096 5559 ns 14517 ns BM_StableSort_uint16_Ascending_16384 22803 ns 70275 ns BM_StableSort_uint16_Ascending_65536 83109 ns 333446 ns BM_StableSort_uint16_Ascending_262144 562667 ns 1568670 ns BM_StableSort_uint16_Ascending_524288 1564646 ns 3059839 ns BM_StableSort_uint16_Ascending_1048576 3178826 ns 7048327 ns BM_StableSort_uint16_Descending_1 3.34 ns 3.93 ns BM_StableSort_uint16_Descending_4 8.75 ns 9.73 ns BM_StableSort_uint16_Descending_16 55.9 ns 55.5 ns BM_StableSort_uint16_Descending_64 1021 ns 1035 ns BM_StableSort_uint16_Descending_256 4752 ns 4931 ns BM_StableSort_uint16_Descending_1024 2982 ns 19727 ns BM_StableSort_uint16_Descending_4096 10432 ns 83165 ns BM_StableSort_uint16_Descending_16384 56593 ns 326131 ns BM_StableSort_uint16_Descending_65536 439134 ns 1371346 ns BM_StableSort_uint16_Descending_262144 1220925 ns 5735665 ns BM_StableSort_uint16_Descending_524288 2767234 ns 16758330 ns BM_StableSort_uint16_Descending_1048576 5673769 ns 17541715 ns BM_StableSort_uint16_SingleElement_1 3.53 ns 3.73 ns BM_StableSort_uint16_SingleElement_4 6.27 ns 5.81 ns BM_StableSort_uint16_SingleElement_16 14.8 ns 15.1 ns BM_StableSort_uint16_SingleElement_64 51.5 ns 50.9 ns BM_StableSort_uint16_SingleElement_256 536 ns 540 ns BM_StableSort_uint16_SingleElement_1024 1669 ns 2690 ns BM_StableSort_uint16_SingleElement_4096 5840 ns 14230 ns BM_StableSort_uint16_SingleElement_16384 22468 ns 68524 ns BM_StableSort_uint16_SingleElement_65536 89845 ns 332187 ns BM_StableSort_uint16_SingleElement_262144 590736 ns 1550868 ns BM_StableSort_uint16_SingleElement_524288 1573677 ns 3095703 ns BM_StableSort_uint16_SingleElement_1048576 3183421 ns 8251180 ns BM_StableSort_uint16_PipeOrgan_1 3.70 ns 3.64 ns BM_StableSort_uint16_PipeOrgan_4 7.01 ns 6.81 ns BM_StableSort_uint16_PipeOrgan_16 25.7 ns 26.4 ns BM_StableSort_uint16_PipeOrgan_64 283 ns 277 ns BM_StableSort_uint16_PipeOrgan_256 2562 ns 2852 ns BM_StableSort_uint16_PipeOrgan_1024 2863 ns 10892 ns BM_StableSort_uint16_PipeOrgan_4096 10585 ns 45668 ns BM_StableSort_uint16_PipeOrgan_16384 59151 ns 194358 ns BM_StableSort_uint16_PipeOrgan_65536 508579 ns 854692 ns BM_StableSort_uint16_PipeOrgan_262144 901294 ns 3606346 ns BM_StableSort_uint16_PipeOrgan_524288 2192498 ns 10449279 ns BM_StableSort_uint16_PipeOrgan_1048576 4204368 ns 11956606 ns BM_StableSort_uint16_QuickSortAdversary_1 3.20 ns 3.63 ns BM_StableSort_uint16_QuickSortAdversary_4 5.30 ns 6.38 ns BM_StableSort_uint16_QuickSortAdversary_16 14.5 ns 15.3 ns BM_StableSort_uint16_QuickSortAdversary_64 575 ns 611 ns BM_StableSort_uint16_QuickSortAdversary_256 2423 ns 2577 ns BM_StableSort_uint16_QuickSortAdversary_1024 2794 ns 16854 ns BM_StableSort_uint16_QuickSortAdversary_4096 10511 ns 75952 ns BM_StableSort_uint16_QuickSortAdversary_16384 56214 ns 333824 ns BM_StableSort_uint16_QuickSortAdversary_65536 422512 ns 1354867 ns BM_StableSort_uint16_QuickSortAdversary_262144 583301 ns 1564443 ns BM_StableSort_uint16_QuickSortAdversary_524288 1584319 ns 3265575 ns BM_StableSort_uint16_QuickSortAdversary_1048576 3197732 ns 7945245 ns BM_StableSort_int32_Random_1 3.81 ns 3.70 ns BM_StableSort_int32_Random_4 20.8 ns 23.4 ns BM_StableSort_int32_Random_16 134 ns 161 ns BM_StableSort_int32_Random_64 895 ns 984 ns BM_StableSort_int32_Random_256 5640 ns 5897 ns BM_StableSort_int32_Random_1024 6994 ns 32118 ns BM_StableSort_int32_Random_4096 27367 ns 168960 ns BM_StableSort_int32_Random_16384 183261 ns 843240 ns BM_StableSort_int32_Random_65536 950914 ns 3953588 ns BM_StableSort_int32_Random_262144 3673311 ns 16790171 ns BM_StableSort_int32_Random_524288 11515700 ns 36023098 ns BM_StableSort_int32_Random_1048576 24492515 ns 78116028 ns BM_StableSort_int32_Ascending_1 3.31 ns 4.48 ns BM_StableSort_int32_Ascending_4 5.96 ns 6.99 ns BM_StableSort_int32_Ascending_16 13.0 ns 16.0 ns BM_StableSort_int32_Ascending_64 36.7 ns 53.0 ns BM_StableSort_int32_Ascending_256 391 ns 471 ns BM_StableSort_int32_Ascending_1024 2705 ns 2682 ns BM_StableSort_int32_Ascending_4096 8773 ns 14231 ns BM_StableSort_int32_Ascending_16384 34709 ns 70625 ns BM_StableSort_int32_Ascending_65536 142907 ns 344482 ns BM_StableSort_int32_Ascending_262144 745483 ns 1591418 ns BM_StableSort_int32_Ascending_524288 1873701 ns 3190305 ns BM_StableSort_int32_Ascending_1048576 3851590 ns 7570095 ns BM_StableSort_int32_Descending_1 3.22 ns 4.23 ns BM_StableSort_int32_Descending_4 7.58 ns 11.2 ns BM_StableSort_int32_Descending_16 63.9 ns 58.6 ns BM_StableSort_int32_Descending_64 1133 ns 1017 ns BM_StableSort_int32_Descending_256 4850 ns 4464 ns BM_StableSort_int32_Descending_1024 7023 ns 18954 ns BM_StableSort_int32_Descending_4096 28550 ns 75163 ns BM_StableSort_int32_Descending_16384 200880 ns 341104 ns BM_StableSort_int32_Descending_65536 1095910 ns 1398021 ns BM_StableSort_int32_Descending_262144 3818864 ns 5695486 ns BM_StableSort_int32_Descending_524288 5606779 ns 17593982 ns BM_StableSort_int32_Descending_1048576 16416366 ns 26649503 ns BM_StableSort_int32_SingleElement_1 3.81 ns 3.71 ns BM_StableSort_int32_SingleElement_4 6.57 ns 6.61 ns BM_StableSort_int32_SingleElement_16 14.0 ns 15.8 ns BM_StableSort_int32_SingleElement_64 38.7 ns 53.5 ns BM_StableSort_int32_SingleElement_256 386 ns 554 ns BM_StableSort_int32_SingleElement_1024 2761 ns 3046 ns BM_StableSort_int32_SingleElement_4096 9179 ns 15188 ns BM_StableSort_int32_SingleElement_16384 34794 ns 70119 ns BM_StableSort_int32_SingleElement_65536 135190 ns 354755 ns BM_StableSort_int32_SingleElement_262144 760995 ns 1644072 ns BM_StableSort_int32_SingleElement_524288 1969575 ns 3343419 ns BM_StableSort_int32_SingleElement_1048576 4423816 ns 8346971 ns BM_StableSort_int32_PipeOrgan_1 3.79 ns 3.63 ns BM_StableSort_int32_PipeOrgan_4 6.21 ns 6.73 ns BM_StableSort_int32_PipeOrgan_16 27.5 ns 26.0 ns BM_StableSort_int32_PipeOrgan_64 291 ns 265 ns BM_StableSort_int32_PipeOrgan_256 2557 ns 2518 ns BM_StableSort_int32_PipeOrgan_1024 6765 ns 10976 ns BM_StableSort_int32_PipeOrgan_4096 26373 ns 44537 ns BM_StableSort_int32_PipeOrgan_16384 201466 ns 188582 ns BM_StableSort_int32_PipeOrgan_65536 1148533 ns 802368 ns BM_StableSort_int32_PipeOrgan_262144 2255177 ns 3477829 ns BM_StableSort_int32_PipeOrgan_524288 3947015 ns 10356637 ns BM_StableSort_int32_PipeOrgan_1048576 10274312 ns 16405366 ns BM_StableSort_int32_QuickSortAdversary_1 3.32 ns 4.36 ns BM_StableSort_int32_QuickSortAdversary_4 5.98 ns 7.44 ns BM_StableSort_int32_QuickSortAdversary_16 13.0 ns 16.3 ns BM_StableSort_int32_QuickSortAdversary_64 657 ns 616 ns BM_StableSort_int32_QuickSortAdversary_256 2569 ns 2483 ns BM_StableSort_int32_QuickSortAdversary_1024 6898 ns 19635 ns BM_StableSort_int32_QuickSortAdversary_4096 27092 ns 75108 ns BM_StableSort_int32_QuickSortAdversary_16384 190379 ns 316463 ns BM_StableSort_int32_QuickSortAdversary_65536 1109040 ns 1319018 ns BM_StableSort_int32_QuickSortAdversary_262144 4361925 ns 5472779 ns BM_StableSort_int32_QuickSortAdversary_524288 6528215 ns 17538983 ns BM_StableSort_int32_QuickSortAdversary_1048576 18345325 ns 27223926 ns BM_StableSort_uint32_Random_1 3.67 ns 3.82 ns BM_StableSort_uint32_Random_4 22.3 ns 21.8 ns BM_StableSort_uint32_Random_16 155 ns 153 ns BM_StableSort_uint32_Random_64 946 ns 976 ns BM_StableSort_uint32_Random_256 5824 ns 6019 ns BM_StableSort_uint32_Random_1024 4525 ns 32764 ns BM_StableSort_uint32_Random_4096 17223 ns 158608 ns BM_StableSort_uint32_Random_16384 134821 ns 748525 ns BM_StableSort_uint32_Random_65536 716644 ns 3453325 ns BM_StableSort_uint32_Random_262144 3628062 ns 16065414 ns BM_StableSort_uint32_Random_524288 10971334 ns 36567712 ns BM_StableSort_uint32_Random_1048576 22688377 ns 77533497 ns BM_StableSort_uint32_Ascending_1 3.57 ns 3.44 ns BM_StableSort_uint32_Ascending_4 5.73 ns 5.33 ns BM_StableSort_uint32_Ascending_16 14.5 ns 14.0 ns BM_StableSort_uint32_Ascending_64 50.3 ns 51.3 ns BM_StableSort_uint32_Ascending_256 465 ns 467 ns BM_StableSort_uint32_Ascending_1024 3042 ns 2530 ns BM_StableSort_uint32_Ascending_4096 9842 ns 12207 ns BM_StableSort_uint32_Ascending_16384 37994 ns 61726 ns BM_StableSort_uint32_Ascending_65536 148890 ns 294385 ns BM_StableSort_uint32_Ascending_262144 855080 ns 1422167 ns BM_StableSort_uint32_Ascending_524288 2154903 ns 3203018 ns BM_StableSort_uint32_Ascending_1048576 5002518 ns 7563817 ns BM_StableSort_uint32_Descending_1 3.51 ns 3.40 ns BM_StableSort_uint32_Descending_4 9.09 ns 7.95 ns BM_StableSort_uint32_Descending_16 54.8 ns 74.4 ns BM_StableSort_uint32_Descending_64 1003 ns 1305 ns BM_StableSort_uint32_Descending_256 4545 ns 5300 ns BM_StableSort_uint32_Descending_1024 4361 ns 21884 ns BM_StableSort_uint32_Descending_4096 16018 ns 90534 ns BM_StableSort_uint32_Descending_16384 146274 ns 381943 ns BM_StableSort_uint32_Descending_65536 938248 ns 1536806 ns BM_StableSort_uint32_Descending_262144 3899300 ns 6387843 ns BM_StableSort_uint32_Descending_524288 5808157 ns 21959858 ns BM_StableSort_uint32_Descending_1048576 17520047 ns 26351912 ns BM_StableSort_uint32_SingleElement_1 4.03 ns 3.97 ns BM_StableSort_uint32_SingleElement_4 6.55 ns 6.41 ns BM_StableSort_uint32_SingleElement_16 15.6 ns 15.8 ns BM_StableSort_uint32_SingleElement_64 52.3 ns 58.7 ns BM_StableSort_uint32_SingleElement_256 473 ns 485 ns BM_StableSort_uint32_SingleElement_1024 3020 ns 2407 ns BM_StableSort_uint32_SingleElement_4096 9998 ns 12527 ns BM_StableSort_uint32_SingleElement_16384 38072 ns 62228 ns BM_StableSort_uint32_SingleElement_65536 153706 ns 295662 ns BM_StableSort_uint32_SingleElement_262144 836532 ns 1477099 ns BM_StableSort_uint32_SingleElement_524288 2144900 ns 3157204 ns BM_StableSort_uint32_SingleElement_1048576 4995525 ns 7617233 ns BM_StableSort_uint32_PipeOrgan_1 4.02 ns 3.99 ns BM_StableSort_uint32_PipeOrgan_4 6.97 ns 6.84 ns BM_StableSort_uint32_PipeOrgan_16 26.1 ns 29.7 ns BM_StableSort_uint32_PipeOrgan_64 266 ns 333 ns BM_StableSort_uint32_PipeOrgan_256 2462 ns 2892 ns BM_StableSort_uint32_PipeOrgan_1024 4291 ns 12431 ns BM_StableSort_uint32_PipeOrgan_4096 15638 ns 51449 ns BM_StableSort_uint32_PipeOrgan_16384 154563 ns 217460 ns BM_StableSort_uint32_PipeOrgan_65536 907724 ns 925873 ns BM_StableSort_uint32_PipeOrgan_262144 2394580 ns 4103575 ns BM_StableSort_uint32_PipeOrgan_524288 4177145 ns 13947158 ns BM_StableSort_uint32_PipeOrgan_1048576 11848224 ns 18807297 ns BM_StableSort_uint32_QuickSortAdversary_1 3.50 ns 3.43 ns BM_StableSort_uint32_QuickSortAdversary_4 5.88 ns 4.96 ns BM_StableSort_uint32_QuickSortAdversary_16 14.6 ns 14.0 ns BM_StableSort_uint32_QuickSortAdversary_64 576 ns 715 ns BM_StableSort_uint32_QuickSortAdversary_256 2353 ns 2797 ns BM_StableSort_uint32_QuickSortAdversary_1024 4176 ns 21775 ns BM_StableSort_uint32_QuickSortAdversary_4096 15565 ns 96188 ns BM_StableSort_uint32_QuickSortAdversary_16384 149092 ns 398332 ns BM_StableSort_uint32_QuickSortAdversary_65536 902488 ns 1552393 ns BM_StableSort_uint32_QuickSortAdversary_262144 3946517 ns 6560414 ns BM_StableSort_uint32_QuickSortAdversary_524288 6247114 ns 22420977 ns BM_StableSort_uint32_QuickSortAdversary_1048576 19892446 ns 26529576 ns BM_StableSort_int64_Random_1 3.83 ns 3.98 ns BM_StableSort_int64_Random_4 21.1 ns 24.0 ns BM_StableSort_int64_Random_16 129 ns 136 ns BM_StableSort_int64_Random_64 890 ns 906 ns BM_StableSort_int64_Random_256 5542 ns 5901 ns BM_StableSort_int64_Random_1024 16085 ns 33112 ns BM_StableSort_int64_Random_4096 63895 ns 162181 ns BM_StableSort_int64_Random_16384 348827 ns 790045 ns BM_StableSort_int64_Random_65536 1488237 ns 3557506 ns BM_StableSort_int64_Random_262144 8195713 ns 16315808 ns BM_StableSort_int64_Random_524288 16586833 ns 38274075 ns BM_StableSort_int64_Random_1048576 40346644 ns 79182089 ns BM_StableSort_int64_Ascending_1 3.76 ns 3.55 ns BM_StableSort_int64_Ascending_4 5.82 ns 6.19 ns BM_StableSort_int64_Ascending_16 11.7 ns 11.8 ns BM_StableSort_int64_Ascending_64 32.9 ns 36.8 ns BM_StableSort_int64_Ascending_256 415 ns 550 ns BM_StableSort_int64_Ascending_1024 5352 ns 3347 ns BM_StableSort_int64_Ascending_4096 17516 ns 19134 ns BM_StableSort_int64_Ascending_16384 64147 ns 91099 ns BM_StableSort_int64_Ascending_65536 322126 ns 434009 ns BM_StableSort_int64_Ascending_262144 1554669 ns 2057056 ns BM_StableSort_int64_Ascending_524288 3656527 ns 5016650 ns BM_StableSort_int64_Ascending_1048576 10469979 ns 12908613 ns BM_StableSort_int64_Descending_1 4.09 ns 3.35 ns BM_StableSort_int64_Descending_4 9.13 ns 8.01 ns BM_StableSort_int64_Descending_16 76.8 ns 92.9 ns BM_StableSort_int64_Descending_64 1336 ns 1417 ns BM_StableSort_int64_Descending_256 5525 ns 5674 ns BM_StableSort_int64_Descending_1024 17461 ns 22558 ns BM_StableSort_int64_Descending_4096 64285 ns 102360 ns BM_StableSort_int64_Descending_16384 336946 ns 388940 ns BM_StableSort_int64_Descending_65536 837912 ns 1662169 ns BM_StableSort_int64_Descending_262144 3680806 ns 7494323 ns BM_StableSort_int64_Descending_524288 11023784 ns 24935033 ns BM_StableSort_int64_Descending_1048576 20023568 ns 33220712 ns BM_StableSort_int64_SingleElement_1 3.37 ns 3.98 ns BM_StableSort_int64_SingleElement_4 5.32 ns 6.92 ns BM_StableSort_int64_SingleElement_16 10.9 ns 13.3 ns BM_StableSort_int64_SingleElement_64 32.1 ns 43.8 ns BM_StableSort_int64_SingleElement_256 420 ns 541 ns BM_StableSort_int64_SingleElement_1024 5689 ns 3381 ns BM_StableSort_int64_SingleElement_4096 19199 ns 17989 ns BM_StableSort_int64_SingleElement_16384 75754 ns 91963 ns BM_StableSort_int64_SingleElement_65536 357106 ns 500326 ns BM_StableSort_int64_SingleElement_262144 1672975 ns 2417734 ns BM_StableSort_int64_SingleElement_524288 3642891 ns 5200878 ns BM_StableSort_int64_SingleElement_1048576 11172007 ns 13729511 ns BM_StableSort_int64_PipeOrgan_1 3.38 ns 3.94 ns BM_StableSort_int64_PipeOrgan_4 5.73 ns 6.44 ns BM_StableSort_int64_PipeOrgan_16 27.5 ns 29.0 ns BM_StableSort_int64_PipeOrgan_64 310 ns 321 ns BM_StableSort_int64_PipeOrgan_256 2761 ns 2918 ns BM_StableSort_int64_PipeOrgan_1024 16105 ns 12525 ns BM_StableSort_int64_PipeOrgan_4096 65289 ns 59990 ns BM_StableSort_int64_PipeOrgan_16384 341757 ns 270636 ns BM_StableSort_int64_PipeOrgan_65536 587452 ns 1126132 ns BM_StableSort_int64_PipeOrgan_262144 2837955 ns 5034180 ns BM_StableSort_int64_PipeOrgan_524288 6617313 ns 15267354 ns BM_StableSort_int64_PipeOrgan_1048576 15208796 ns 23162989 ns BM_StableSort_int64_QuickSortAdversary_1 3.77 ns 3.45 ns BM_StableSort_int64_QuickSortAdversary_4 5.55 ns 5.20 ns BM_StableSort_int64_QuickSortAdversary_16 12.5 ns 11.5 ns BM_StableSort_int64_QuickSortAdversary_64 646 ns 750 ns BM_StableSort_int64_QuickSortAdversary_256 2655 ns 3539 ns BM_StableSort_int64_QuickSortAdversary_1024 16373 ns 22349 ns BM_StableSort_int64_QuickSortAdversary_4096 62306 ns 97248 ns BM_StableSort_int64_QuickSortAdversary_16384 321755 ns 388084 ns BM_StableSort_int64_QuickSortAdversary_65536 1374694 ns 1596091 ns BM_StableSort_int64_QuickSortAdversary_262144 4374661 ns 6894139 ns BM_StableSort_int64_QuickSortAdversary_524288 12736074 ns 23932229 ns BM_StableSort_int64_QuickSortAdversary_1048576 22615219 ns 33355629 ns BM_StableSort_uint64_Random_1 3.82 ns 3.49 ns BM_StableSort_uint64_Random_4 22.4 ns 23.4 ns BM_StableSort_uint64_Random_16 154 ns 146 ns BM_StableSort_uint64_Random_64 924 ns 926 ns BM_StableSort_uint64_Random_256 5864 ns 5913 ns BM_StableSort_uint64_Random_1024 7168 ns 31746 ns BM_StableSort_uint64_Random_4096 27668 ns 154224 ns BM_StableSort_uint64_Random_16384 219526 ns 755205 ns BM_StableSort_uint64_Random_65536 965251 ns 3490165 ns BM_StableSort_uint64_Random_262144 6262162 ns 15889589 ns BM_StableSort_uint64_Random_524288 12530078 ns 36458581 ns BM_StableSort_uint64_Random_1048576 38462191 ns 75168445 ns BM_StableSort_uint64_Ascending_1 3.30 ns 3.35 ns BM_StableSort_uint64_Ascending_4 5.65 ns 5.84 ns BM_StableSort_uint64_Ascending_16 14.7 ns 12.6 ns BM_StableSort_uint64_Ascending_64 55.3 ns 34.6 ns BM_StableSort_uint64_Ascending_256 513 ns 533 ns BM_StableSort_uint64_Ascending_1024 5541 ns 3189 ns BM_StableSort_uint64_Ascending_4096 17706 ns 20326 ns BM_StableSort_uint64_Ascending_16384 66420 ns 93757 ns BM_StableSort_uint64_Ascending_65536 341425 ns 435016 ns BM_StableSort_uint64_Ascending_262144 1595691 ns 2088317 ns BM_StableSort_uint64_Ascending_524288 3808703 ns 5092832 ns BM_StableSort_uint64_Ascending_1048576 11060417 ns 13023250 ns BM_StableSort_uint64_Descending_1 3.29 ns 3.35 ns BM_StableSort_uint64_Descending_4 8.65 ns 7.92 ns BM_StableSort_uint64_Descending_16 54.7 ns 80.2 ns BM_StableSort_uint64_Descending_64 1028 ns 1307 ns BM_StableSort_uint64_Descending_256 4521 ns 5635 ns BM_StableSort_uint64_Descending_1024 7122 ns 23323 ns BM_StableSort_uint64_Descending_4096 30538 ns 95892 ns BM_StableSort_uint64_Descending_16384 195565 ns 392721 ns BM_StableSort_uint64_Descending_65536 852002 ns 1720358 ns BM_StableSort_uint64_Descending_262144 3737884 ns 7484130 ns BM_StableSort_uint64_Descending_524288 11159345 ns 25690770 ns BM_StableSort_uint64_Descending_1048576 20648864 ns 33057383 ns BM_StableSort_uint64_SingleElement_1 3.62 ns 4.10 ns BM_StableSort_uint64_SingleElement_4 6.73 ns 6.64 ns BM_StableSort_uint64_SingleElement_16 14.9 ns 11.3 ns BM_StableSort_uint64_SingleElement_64 52.0 ns 33.0 ns BM_StableSort_uint64_SingleElement_256 511 ns 582 ns BM_StableSort_uint64_SingleElement_1024 6499 ns 3287 ns BM_StableSort_uint64_SingleElement_4096 22190 ns 17616 ns BM_StableSort_uint64_SingleElement_16384 84378 ns 86885 ns BM_StableSort_uint64_SingleElement_65536 466257 ns 457144 ns BM_StableSort_uint64_SingleElement_262144 1993687 ns 2361999 ns BM_StableSort_uint64_SingleElement_524288 4759565 ns 5096771 ns BM_StableSort_uint64_SingleElement_1048576 12426111 ns 13468453 ns BM_StableSort_uint64_PipeOrgan_1 3.73 ns 3.94 ns BM_StableSort_uint64_PipeOrgan_4 7.18 ns 7.54 ns BM_StableSort_uint64_PipeOrgan_16 25.2 ns 29.1 ns BM_StableSort_uint64_PipeOrgan_64 260 ns 321 ns BM_StableSort_uint64_PipeOrgan_256 2468 ns 2970 ns BM_StableSort_uint64_PipeOrgan_1024 7025 ns 12912 ns BM_StableSort_uint64_PipeOrgan_4096 28968 ns 53379 ns BM_StableSort_uint64_PipeOrgan_16384 194156 ns 239790 ns BM_StableSort_uint64_PipeOrgan_65536 599491 ns 993800 ns BM_StableSort_uint64_PipeOrgan_262144 2648585 ns 4689680 ns BM_StableSort_uint64_PipeOrgan_524288 7621109 ns 15401808 ns BM_StableSort_uint64_PipeOrgan_1048576 15608814 ns 23484821 ns BM_StableSort_uint64_QuickSortAdversary_1 3.38 ns 3.54 ns BM_StableSort_uint64_QuickSortAdversary_4 5.50 ns 6.03 ns BM_StableSort_uint64_QuickSortAdversary_16 14.2 ns 11.0 ns BM_StableSort_uint64_QuickSortAdversary_64 597 ns 688 ns BM_StableSort_uint64_QuickSortAdversary_256 2446 ns 2818 ns BM_StableSort_uint64_QuickSortAdversary_1024 7266 ns 20319 ns BM_StableSort_uint64_QuickSortAdversary_4096 31155 ns 89112 ns BM_StableSort_uint64_QuickSortAdversary_16384 201033 ns 390574 ns BM_StableSort_uint64_QuickSortAdversary_65536 871014 ns 1685639 ns BM_StableSort_uint64_QuickSortAdversary_262144 3978535 ns 7265830 ns BM_StableSort_uint64_QuickSortAdversary_524288 10279721 ns 25350004 ns BM_StableSort_uint64_QuickSortAdversary_1048576 20256585 ns 33054393 ns ``` --- libcxx/docs/ReleaseNotes/19.rst | 2 + libcxx/include/CMakeLists.txt | 1 + libcxx/include/__algorithm/radix_sort.h | 332 ++++++++++++++++++ libcxx/include/__algorithm/stable_sort.h | 44 +++ libcxx/include/__bit/bit_log2.h | 11 +- libcxx/include/module.modulemap | 1 + .../alg.sort/stable.sort/stable_sort.pass.cpp | 117 +++--- 7 files changed, 449 insertions(+), 59 deletions(-) create mode 100644 libcxx/include/__algorithm/radix_sort.h diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst index e8f76773c5437..aec7865d52fad 100644 --- a/libcxx/docs/ReleaseNotes/19.rst +++ b/libcxx/docs/ReleaseNotes/19.rst @@ -126,6 +126,8 @@ Improvements and New Features - In C++23 and C++26 the number of transitive includes in several headers has been reduced, improving the compilation speed. +- ``std::stable_sort`` uses radix sort for integral types now, which can improve the performance up to 10 times, depending + on type of sorted elements and the initial state of the sorted array. Deprecations and Removals ------------------------- diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 0b484ebe5e87c..96071527b5283 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -73,6 +73,7 @@ set(files __algorithm/prev_permutation.h __algorithm/pstl.h __algorithm/push_heap.h + __algorithm/radix_sort.h __algorithm/ranges_adjacent_find.h __algorithm/ranges_all_of.h __algorithm/ranges_any_of.h diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h new file mode 100644 index 0000000000000..c39b26a8620f4 --- /dev/null +++ b/libcxx/include/__algorithm/radix_sort.h @@ -0,0 +1,332 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RADIX_SORT_H +#define _LIBCPP___ALGORITHM_RADIX_SORT_H + +// This is an implementation of classic LSD radix sort algorithm, running in linear time and using `O(max(N, M))` +// additional memory, where `N` is size of an input range, `M` - maximum value of +// a radix of the sorted integer type. Type of the radix and its maximum value are determined at compile time +// based on type returned by function `__radix`. The default radix is uint8. + +// The algorithm is equivalent to several consecutive calls of counting sort for each +// radix of the sorted numbers from low to high byte. +// The algorithm uses a temporary buffer of size equal to size of the input range. Each `i`-th pass +// of the algorithm sorts values by `i`-th radix and moves values to the temporary buffer (for each even `i`, counted +// from zero), or moves them back to the initial range (for each odd `i`). If there is only one radix in sorted integers +// (e.g. int8), the sorted values are placed to the buffer, and then moved back to the initial range. + +// The implementation also has several optimizations: +// - the counters for the counting sort are calculated in one pass for all radices; +// - if all values of a radix are the same, we do not sort that radix, and just move items to the buffer; +// - if two consecutive radices satisfies condition above, we do nothing for these two radices. + +#include <__algorithm/for_each.h> +#include <__algorithm/move.h> +#include <__bit/bit_log2.h> +#include <__bit/countl.h> +#include <__config> +#include <__functional/identity.h> +#include <__iterator/distance.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/move_iterator.h> +#include <__iterator/next.h> +#include <__iterator/reverse_iterator.h> +#include <__numeric/partial_sum.h> +#include <__type_traits/decay.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_assignable.h> +#include <__type_traits/is_integral.h> +#include <__type_traits/is_unsigned.h> +#include <__type_traits/make_unsigned.h> +#include <__utility/forward.h> +#include <__utility/integer_sequence.h> +#include <__utility/move.h> +#include <__utility/pair.h> +#include +#include +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 14 + +template +_LIBCPP_HIDE_FROM_ABI pair<_OutputIterator, __iter_value_type<_InputIterator>> +__partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { + if (__first == __last) + return {__result, 0}; + + auto __max = *__first; + __iter_value_type<_InputIterator> __sum = *__first; + *__result = __sum; + + while (++__first != __last) { + if (__max < *__first) { + __max = *__first; + } + __sum = std::move(__sum) + *__first; + *++__result = __sum; + } + return {++__result, __max}; +} + +template +struct __radix_sort_traits { + using __image_type = decay_t::type>; + static_assert(is_unsigned<__image_type>::value); + + using __radix_type = decay_t::type>; + static_assert(is_integral<__radix_type>::value); + + static constexpr auto __radix_value_range = numeric_limits<__radix_type>::max() + 1; + static constexpr auto __radix_size = std::__bit_log2(__radix_value_range); + static constexpr auto __radix_count = sizeof(__image_type) * CHAR_BIT / __radix_size; +}; + +template +struct __counting_sort_traits { + using __image_type = decay_t::type>; + static_assert(is_unsigned<__image_type>::value); + + static constexpr const auto __value_range = numeric_limits<__image_type>::max() + 1; + static constexpr auto __radix_size = std::__bit_log2(__value_range); +}; + +template +_LIBCPP_HIDE_FROM_ABI auto __nth_radix(size_t __radix_number, _Radix __radix, _Integer __n) { + static_assert(is_unsigned<_Integer>::value); + using __traits = __counting_sort_traits<_Integer, _Radix>; + + return __radix(static_cast<_Integer>(__n >> __traits::__radix_size * __radix_number)); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) { + using __value_type = __iter_value_type<_ForwardIterator>; + using __traits = __counting_sort_traits<__value_type, _Map>; + + std::for_each(__first, __last, [&__counters, &__map](const auto& __preimage) { ++__counters[__map(__preimage)]; }); + + const auto __counters_end = __counters + __traits::__value_range; + std::partial_sum(__counters, __counters_end, __counters); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__dispose(_ForwardIterator __first, + _ForwardIterator __last, + _RandomAccessIterator1 __result, + _Map __map, + _RandomAccessIterator2 __counters) { + std::for_each(__first, __last, [&__result, &__counters, &__map](auto&& __preimage) { + auto __index = __counters[__map(__preimage)]++; + __result[__index] = std::move(__preimage); + }); +} + +template +_LIBCPP_HIDE_FROM_ABI bool __collect_impl( + _ForwardIterator __first, + _ForwardIterator __last, + _Map __map, + _Radix __radix, + _RandomAccessIterator1 __counters, + _RandomAccessIterator2 __maximums, + index_sequence<_Radices...>) { + using __value_type = __iter_value_type<_ForwardIterator>; + constexpr auto __radix_value_range = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_value_range; + + auto __previous = numeric_limits::type>::min(); + auto __is_sorted = true; + std::for_each(__first, __last, [&__counters, &__map, &__radix, &__previous, &__is_sorted](const auto& __value) { + auto __current = __map(__value); + __is_sorted &= (__current >= __previous); + __previous = __current; + + (++__counters[_Radices][std::__nth_radix(_Radices, __radix, __current)], ...); + }); + + ((__maximums[_Radices] = + std::__partial_sum_max(__counters[_Radices], __counters[_Radices] + __radix_value_range, __counters[_Radices]) + .second), + ...); + + return __is_sorted; +} + +template +_LIBCPP_HIDE_FROM_ABI bool +__collect(_ForwardIterator __first, + _ForwardIterator __last, + _Map __map, + _Radix __radix, + _RandomAccessIterator1 __counters, + _RandomAccessIterator2 __maximums) { + using __value_type = __iter_value_type<_ForwardIterator>; + constexpr auto __radix_count = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_count; + return std::__collect_impl( + __first, __last, __map, __radix, __counters, __maximums, make_index_sequence<__radix_count>()); +} + +template +_LIBCPP_HIDE_FROM_ABI void __dispose_backward( + _BidirectionalIterator __first, + _BidirectionalIterator __last, + _RandomAccessIterator1 __result, + _Map __map, + _RandomAccessIterator2 __counters) { + std::for_each(std::make_reverse_iterator(__last), + std::make_reverse_iterator(__first), + [&__result, &__counters, &__map](auto&& __preimage) { + auto __index = --__counters[__map(__preimage)]; + __result[__index] = std::move(__preimage); + }); +} + +template +_LIBCPP_HIDE_FROM_ABI _RandomAccessIterator +__counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) { + using __value_type = __iter_value_type<_ForwardIterator>; + using __traits = __counting_sort_traits<__value_type, _Map>; + + __iter_diff_t<_RandomAccessIterator> __counters[__traits::__value_range + 1] = {0}; + + std::__collect(__first, __last, __map, std::next(std::begin(__counters))); + std::__dispose(__first, __last, __result, __map, std::begin(__counters)); + + return __result + __counters[__traits::__value_range]; +} + +template , _Map, _Radix>::__radix_count == 1, + int> = 0> +_LIBCPP_HIDE_FROM_ABI void __radix_sort_impl( + _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, + _RandomAccessIterator2 __buffer, + _Map __map, + _Radix __radix) { + auto __buffer_end = std::__counting_sort_impl(__first, __last, __buffer, [&__map, &__radix](const auto& __value) { + return __radix(__map(__value)); + }); + + std::move(__buffer, __buffer_end, __first); +} + +template < + class _RandomAccessIterator1, + class _RandomAccessIterator2, + class _Map, + class _Radix, + enable_if_t< __radix_sort_traits<__iter_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count % 2 == 0, + int> = 0 > +_LIBCPP_HIDE_FROM_ABI void __radix_sort_impl( + _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, + _RandomAccessIterator2 __buffer_begin, + _Map __map, + _Radix __radix) { + using __value_type = __iter_value_type<_RandomAccessIterator1>; + using __traits = __radix_sort_traits<__value_type, _Map, _Radix>; + + __iter_diff_t<_RandomAccessIterator1> __counters[__traits::__radix_count][__traits::__radix_value_range] = {{0}}; + __iter_diff_t<_RandomAccessIterator1> __maximums[__traits::__radix_count] = {0}; + const auto __is_sorted = std::__collect(__first, __last, __map, __radix, __counters, __maximums); + if (!__is_sorted) { + const auto __range_size = std::distance(__first, __last); + auto __buffer_end = __buffer_begin + __range_size; + for (size_t __radix_number = 0; __radix_number < __traits::__radix_count; __radix_number += 2) { + const auto __n0th_is_single = __maximums[__radix_number] == __range_size; + const auto __n1th_is_single = __maximums[__radix_number + 1] == __range_size; + + if (__n0th_is_single && __n1th_is_single) { + continue; + } + + if (__n0th_is_single) { + std::move(__first, __last, __buffer_begin); + } else { + auto __n0th = [__radix_number, &__map, &__radix](const auto& __v) { + return std::__nth_radix(__radix_number, __radix, __map(__v)); + }; + std::__dispose_backward(__first, __last, __buffer_begin, __n0th, __counters[__radix_number]); + } + + if (__n1th_is_single) { + std::move(__buffer_begin, __buffer_end, __first); + } else { + auto __n1th = [__radix_number, &__map, &__radix](const auto& __v) { + return std::__nth_radix(__radix_number + 1, __radix, __map(__v)); + }; + std::__dispose_backward(__buffer_begin, __buffer_end, __first, __n1th, __counters[__radix_number + 1]); + } + } + } +} + +_LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(bool __b) { return __b; } + +template +_LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(_Ip __n) { + constexpr const auto __min_value = numeric_limits<_Ip>::min(); + return static_cast >(__n ^ __min_value); +} + +struct __low_byte_fn { + template + _LIBCPP_HIDE_FROM_ABI constexpr uint8_t operator()(_Ip __integer) const { + static_assert(is_unsigned<_Ip>::value); + + return static_cast(__integer & 0xff); + } +}; + +template +_LIBCPP_HIDE_FROM_ABI void +__radix_sort(_RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, + _RandomAccessIterator2 __buffer, + _Map __map, + _Radix __radix) { + auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) { return std::__shift_to_unsigned(__map(__x)); }; + std::__radix_sort_impl(__first, __last, __buffer, __map_to_unsigned, __radix); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__radix_sort(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __buffer) { + std::__radix_sort(__first, __last, __buffer, __identity{}, __low_byte_fn{}); +} + +#endif // _LIBCPP_STD_VER >= 14 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ALGORITHM_RADIX_SORT_H diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h index 1111f5509bc38..70a85023a17f0 100644 --- a/libcxx/include/__algorithm/stable_sort.h +++ b/libcxx/include/__algorithm/stable_sort.h @@ -13,6 +13,7 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/inplace_merge.h> #include <__algorithm/iterator_operations.h> +#include <__algorithm/radix_sort.h> #include <__algorithm/sort.h> #include <__config> #include <__cstddef/ptrdiff_t.h> @@ -21,7 +22,12 @@ #include <__memory/destruct_n.h> #include <__memory/unique_ptr.h> #include <__memory/unique_temporary_buffer.h> +#include <__type_traits/desugars_to.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/is_integral.h> +#include <__type_traits/is_same.h> #include <__type_traits/is_trivially_assignable.h> +#include <__type_traits/remove_cvref.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -189,6 +195,28 @@ struct __stable_sort_switch { static const unsigned value = 128 * is_trivially_copy_assignable<_Tp>::value; }; +#if _LIBCPP_STD_VER >= 17 +template +_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() { + static_assert(is_integral<_Tp>::value); + if constexpr (sizeof(_Tp) == 1) { + return 1 << 8; + } + + return 1 << 10; +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_max_bound() { + static_assert(is_integral<_Tp>::value); + if constexpr (sizeof(_Tp) >= 8) { + return 1 << 15; + } + + return 1 << 16; +} +#endif // _LIBCPP_STD_VER >= 17 + template void __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, @@ -211,6 +239,22 @@ void __stable_sort(_RandomAccessIterator __first, std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp); return; } + +#if _LIBCPP_STD_VER >= 17 + constexpr auto __default_comp = + __desugars_to_v<__totally_ordered_less_tag, __remove_cvref_t<_Compare>, value_type, value_type >; + constexpr auto __integral_value = + is_integral_v && is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>; + constexpr auto __allowed_radix_sort = __default_comp && __integral_value; + if constexpr (__allowed_radix_sort) { + if (__len <= __buff_size && __len >= static_cast(__radix_sort_min_bound()) && + __len <= static_cast(__radix_sort_max_bound())) { + std::__radix_sort(__first, __last, __buff); + return; + } + } +#endif // _LIBCPP_STD_VER >= 17 + typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2; _RandomAccessIterator __m = __first + __l2; if (__len <= __buff_size) { diff --git a/libcxx/include/__bit/bit_log2.h b/libcxx/include/__bit/bit_log2.h index 62936f6786860..94ee6c3b2bb1d 100644 --- a/libcxx/include/__bit/bit_log2.h +++ b/libcxx/include/__bit/bit_log2.h @@ -10,8 +10,8 @@ #define _LIBCPP___BIT_BIT_LOG2_H #include <__bit/countl.h> -#include <__concepts/arithmetic.h> #include <__config> +#include <__type_traits/is_unsigned_integer.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -20,14 +20,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 20 +#if _LIBCPP_STD_VER >= 14 -template <__libcpp_unsigned_integer _Tp> +template _LIBCPP_HIDE_FROM_ABI constexpr _Tp __bit_log2(_Tp __t) noexcept { - return numeric_limits<_Tp>::digits - 1 - std::countl_zero(__t); + static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__bit_log2 requires an unsigned integer type"); + return numeric_limits<_Tp>::digits - 1 - std::__countl_zero(__t); } -#endif // _LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 14 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 86efbd36b20d1..70aee7da79cba 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -488,6 +488,7 @@ module std [system] { module prev_permutation { header "__algorithm/prev_permutation.h" } module pstl { header "__algorithm/pstl.h" } module push_heap { header "__algorithm/push_heap.h" } + module radix_sort { header "__algorithm/radix_sort.h" } module ranges_adjacent_find { header "__algorithm/ranges_adjacent_find.h" } module ranges_all_of { header "__algorithm/ranges_all_of.h" } module ranges_any_of { header "__algorithm/ranges_any_of.h" } diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp index 4301d22027de8..621234354092d 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp @@ -50,16 +50,16 @@ template void test_sort_driver_driver(RI f, RI l, int start, RI real_last) { - for (RI i = l; i > f + start;) - { - *--i = start; - if (f == i) - { - test_sort_helper(f, real_last); - } + using value_type = typename std::iterator_traits::value_type; + + for (RI i = l; i > f + start;) { + *--i = static_cast(start); + if (f == i) { + test_sort_helper(f, real_last); + } if (start > 0) test_sort_driver_driver(f, i, start-1, real_last); - } + } } template @@ -69,55 +69,61 @@ test_sort_driver(RI f, RI l, int start) test_sort_driver_driver(f, l, start, l); } +template +void test_sort_() { + V ia[sa]; + for (int i = 0; i < sa; ++i) { + test_sort_driver(ia, ia + sa, i); + } +} + template -void -test_sort_() -{ - int ia[sa]; - for (int i = 0; i < sa; ++i) - { - test_sort_driver(ia, ia+sa, i); - } +void test_sort_() { + test_sort_(); + test_sort_(); } -void -test_larger_sorts(int N, int M) -{ - assert(N != 0); - assert(M != 0); - // create array length N filled with M different numbers - int* array = new int[N]; - int x = 0; - for (int i = 0; i < N; ++i) - { - array[i] = x; - if (++x == M) - x = 0; - } - // test saw tooth pattern - std::stable_sort(array, array+N); - assert(std::is_sorted(array, array+N)); - // test random pattern - std::shuffle(array, array+N, randomness); - std::stable_sort(array, array+N); - assert(std::is_sorted(array, array+N)); - // test sorted pattern - std::stable_sort(array, array+N); - assert(std::is_sorted(array, array+N)); - // test reverse sorted pattern - std::reverse(array, array+N); - std::stable_sort(array, array+N); - assert(std::is_sorted(array, array+N)); - // test swap ranges 2 pattern - std::swap_ranges(array, array+N/2, array+N/2); - std::stable_sort(array, array+N); - assert(std::is_sorted(array, array+N)); - // test reverse swap ranges 2 pattern - std::reverse(array, array+N); - std::swap_ranges(array, array+N/2, array+N/2); - std::stable_sort(array, array+N); - assert(std::is_sorted(array, array+N)); - delete [] array; +template +void test_larger_sorts(int N, int M) { + assert(N != 0); + assert(M != 0); + // create array length N filled with M different numbers + V* array = new V[N]; + int x = 0; + for (int i = 0; i < N; ++i) { + array[i] = static_cast(x); + if (++x == M) + x = 0; + } + // test saw tooth pattern + std::stable_sort(array, array + N); + assert(std::is_sorted(array, array + N)); + // test random pattern + std::shuffle(array, array + N, randomness); + std::stable_sort(array, array + N); + assert(std::is_sorted(array, array + N)); + // test sorted pattern + std::stable_sort(array, array + N); + assert(std::is_sorted(array, array + N)); + // test reverse sorted pattern + std::reverse(array, array + N); + std::stable_sort(array, array + N); + assert(std::is_sorted(array, array + N)); + // test swap ranges 2 pattern + std::swap_ranges(array, array + N / 2, array + N / 2); + std::stable_sort(array, array + N); + assert(std::is_sorted(array, array + N)); + // test reverse swap ranges 2 pattern + std::reverse(array, array + N); + std::swap_ranges(array, array + N / 2, array + N / 2); + std::stable_sort(array, array + N); + assert(std::is_sorted(array, array + N)); + delete[] array; +} + +void test_larger_sorts(int N, int M) { + test_larger_sorts(N, M); + test_larger_sorts(N, M); } void @@ -156,6 +162,9 @@ int main(int, char**) test_larger_sorts(997); test_larger_sorts(1000); test_larger_sorts(1009); + test_larger_sorts(1024); + test_larger_sorts(1031); + test_larger_sorts(2053); #if !defined(TEST_HAS_NO_EXCEPTIONS) { // check that the algorithm works without memory From 0e1c5bfac8574bc8419968279069c76b55e5f270 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 9 Jan 2025 18:02:43 +0000 Subject: [PATCH 20/79] [gn build] Port 69b54c1a05c0 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index d4f0a2924ab4d..1144402befa4d 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -145,6 +145,7 @@ if (current_toolchain == default_toolchain) { "__algorithm/prev_permutation.h", "__algorithm/pstl.h", "__algorithm/push_heap.h", + "__algorithm/radix_sort.h", "__algorithm/ranges_adjacent_find.h", "__algorithm/ranges_all_of.h", "__algorithm/ranges_any_of.h", From a6b7181733c83523a39d4f4e788c6b7a227d477d Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Thu, 9 Jan 2025 13:11:52 -0500 Subject: [PATCH 21/79] [HLSL] Move length support out of the DirectX Backend (#121611) ## Changes - Delete DirectX length intrinsic - Delete HLSL length lang builtin - Implement length algorithm entirely in the header. ## History - In the past if an HLSL intrinsic lowered to either a spirv op code or a DXIL opcode we represented it with intrinsics ## Why we are moving away? - To make HLSL apis more portable the team decided that it makes sense for some intrinsics to be defined only in the header. - Since there tends to be more SPIRV opcodes than DXIL opcodes the plan is to support SPIRV opcodes either with target specific builtins or via pattern matching. --- clang/include/clang/Basic/Builtins.td | 6 - clang/lib/CodeGen/CGBuiltin.cpp | 14 -- clang/lib/CodeGen/CGHLSLRuntime.h | 1 - clang/lib/Headers/hlsl/hlsl_detail.h | 23 ++ clang/lib/Headers/hlsl/hlsl_intrinsics.h | 29 +-- clang/lib/Sema/SemaHLSL.cpp | 18 -- clang/test/CodeGenHLSL/builtins/length.hlsl | 203 +++++++++++------- .../test/SemaHLSL/BuiltIns/length-errors.hlsl | 51 +++-- llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 - .../Target/DirectX/DXILIntrinsicExpansion.cpp | 30 --- llvm/test/CodeGen/DirectX/length.ll | 116 ---------- llvm/test/CodeGen/DirectX/length_error.ll | 10 - .../DirectX/length_invalid_intrinsic_error.ll | 10 - .../length_invalid_intrinsic_error_scalar.ll | 10 - 14 files changed, 198 insertions(+), 324 deletions(-) delete mode 100644 llvm/test/CodeGen/DirectX/length.ll delete mode 100644 llvm/test/CodeGen/DirectX/length_error.ll delete mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll delete mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 468c16050e2bf..f4216bd01a074 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4865,12 +4865,6 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } -def HLSLLength : LangBuiltin<"HLSL_LANG"> { - let Spellings = ["__builtin_hlsl_length"]; - let Attributes = [NoThrow, Const]; - let Prototype = "void(...)"; -} - def HLSLLerp : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_lerp"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ca03fb665d423..2b09197669996 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19334,20 +19334,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(), ArrayRef{X, Y, S}, nullptr, "hlsl.lerp"); } - case Builtin::BI__builtin_hlsl_length: { - Value *X = EmitScalarExpr(E->getArg(0)); - - assert(E->getArg(0)->getType()->hasFloatingRepresentation() && - "length operand must have a float representation"); - // if the operand is a scalar, we can use the fabs llvm intrinsic directly - if (!E->getArg(0)->getType()->isVectorType()) - return EmitFAbs(*this, X); - - return Builder.CreateIntrinsic( - /*ReturnType=*/X->getType()->getScalarType(), - CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef{X}, - nullptr, "hlsl.length"); - } case Builtin::BI__builtin_hlsl_normalize: { Value *X = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 46e472f0aae21..00e110e8e6fa2 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -77,7 +77,6 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross) GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees) GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac) - GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length) GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp) GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize) GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index 8d5fd94133153..392075d276b18 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -13,6 +13,14 @@ namespace hlsl { namespace __detail { +template struct is_same { + static const bool value = false; +}; + +template struct is_same { + static const bool value = true; +}; + template struct enable_if {}; template struct enable_if { @@ -33,6 +41,21 @@ constexpr enable_if_t bit_cast(T F) { return __builtin_bit_cast(U, F); } +template +constexpr enable_if_t::value || is_same::value, T> +length_impl(T X) { + return __builtin_elementwise_abs(X); +} + +template +enable_if_t::value || is_same::value, T> +length_vec_impl(vector X) { + vector XSquared = X * X; + T XSquaredSum = XSquared[0]; + [unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i]; + return __builtin_elementwise_sqrt(XSquaredSum); +} + } // namespace __detail } // namespace hlsl #endif //_HLSL_HLSL_DETAILS_H_ diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index b745997f1d5a2..cf287e598f76b 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -1297,27 +1297,16 @@ float4 lerp(float4, float4, float4); /// /// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...). -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) -half length(half); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) -half length(half2); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) -half length(half3); -_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) -half length(half4); +const inline half length(half X) { return __detail::length_impl(X); } +const inline float length(float X) { return __detail::length_impl(X); } -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) -float length(float); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) -float length(float2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) -float length(float3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) -float length(float4); +template const inline half length(vector X) { + return __detail::length_vec_impl(X); +} + +template const inline float length(vector X) { + return __detail::length_vec_impl(X); +} //===----------------------------------------------------------------------===// // log builtins diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 600c800029fd0..64b6fe4cd5eb4 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2100,24 +2100,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } - case Builtin::BI__builtin_hlsl_length: { - if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall)) - return true; - if (SemaRef.checkArgCount(TheCall, 1)) - return true; - - ExprResult A = TheCall->getArg(0); - QualType ArgTyA = A.get()->getType(); - QualType RetTy; - - if (auto *VTy = ArgTyA->getAs()) - RetTy = VTy->getElementType(); - else - RetTy = TheCall->getArg(0)->getType(); - - TheCall->setType(RetTy); - break; - } case Builtin::BI__builtin_hlsl_mad: { if (SemaRef.checkArgCount(TheCall, 3)) return true; diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl index a24f01d275440..ecf2edcf0b05f 100644 --- a/clang/test/CodeGenHLSL/builtins/length.hlsl +++ b/clang/test/CodeGenHLSL/builtins/length.hlsl @@ -1,73 +1,130 @@ -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ -// RUN: --check-prefixes=CHECK,NATIVE_HALF -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF - -// NATIVE_HALF: define noundef nofpclass(nan inf) half @ -// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16(half -// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float -// NATIVE_HALF: ret half -// NO_HALF: ret float -half test_length_half(half p0) -{ - return length(p0); -} -// NATIVE_HALF: define noundef nofpclass(nan inf) half @ -// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v2f16 -// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32( -// NATIVE_HALF: ret half %hlsl.length -// NO_HALF: ret float %hlsl.length -half test_length_half2(half2 p0) -{ - return length(p0); -} -// NATIVE_HALF: define noundef nofpclass(nan inf) half @ -// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v3f16 -// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32( -// NATIVE_HALF: ret half %hlsl.length -// NO_HALF: ret float %hlsl.length -half test_length_half3(half3 p0) -{ - return length(p0); -} -// NATIVE_HALF: define noundef nofpclass(nan inf) half @ -// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v4f16 -// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32( -// NATIVE_HALF: ret half %hlsl.length -// NO_HALF: ret float %hlsl.length -half test_length_half4(half4 p0) -{ - return length(p0); -} - -// CHECK: define noundef nofpclass(nan inf) float @ -// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float -// CHECK: ret float -float test_length_float(float p0) -{ - return length(p0); -} -// CHECK: define noundef nofpclass(nan inf) float @ -// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32( -// CHECK: ret float %hlsl.length -float test_length_float2(float2 p0) -{ - return length(p0); -} -// CHECK: define noundef nofpclass(nan inf) float @ -// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32( -// CHECK: ret float %hlsl.length -float test_length_float3(float3 p0) -{ - return length(p0); -} -// CHECK: define noundef nofpclass(nan inf) float @ -// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32( -// CHECK: ret float %hlsl.length -float test_length_float4(float4 p0) -{ - return length(p0); -} +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -O1 -o - | FileCheck %s + +// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh( +// CHECK-SAME: half noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.fabs.f16(half [[P0]]) +// CHECK-NEXT: ret half [[ELT_ABS_I]] +// +half test_length_half(half p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh( +// CHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x half> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT_I]], [[VECEXT1_I]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I]]) +// CHECK-NEXT: ret half [[TMP0]] +// +half test_length_half2(half2 p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh( +// CHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x half> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]] +// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2 +// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_1]]) +// CHECK-NEXT: ret half [[TMP0]] +// +half test_length_half3(half3 p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh( +// CHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x half> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]] +// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2 +// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]] +// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3 +// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_2]], [[ADD_I_1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_2]]) +// CHECK-NEXT: ret half [[TMP0]] +// +half test_length_half4(half4 p0) +{ + return length(p0); +} + + +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf( +// CHECK-SAME: float noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.fabs.f32(float [[P0]]) +// CHECK-NEXT: ret float [[ELT_ABS_I]] +// +float test_length_float(float p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f( +// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x float> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT_I]], [[VECEXT1_I]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_length_float2(float2 p0) +{ + return length(p0); +} + +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f( +// CHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x float> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]] +// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2 +// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_1]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_length_float3(float3 p0) +{ + return length(p0); +} + + +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f( +// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x float> [[P0]], [[P0]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1 +// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]] +// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2 +// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]] +// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3 +// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_2]], [[ADD_I_1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_2]]) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_length_float4(float4 p0) +{ + return length(p0); +} diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl index 281faada6f5e9..a191f0419fbba 100644 --- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl @@ -1,32 +1,53 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected - +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify void test_too_few_arg() { - return __builtin_hlsl_length(); - // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + return length(); + // expected-error@-1 {{no matching function for call to 'length'}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}} } void test_too_many_arg(float2 p0) { - return __builtin_hlsl_length(p0, p0); - // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + return length(p0, p0); + // expected-error@-1 {{no matching function for call to 'length'}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}} +} + +float double_to_float_type(double p0) { + return length(p0); + // expected-error@-1 {{call to 'length' is ambiguous}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} } -bool builtin_bool_to_float_type_promotion(bool p1) + +float bool_to_float_type_promotion(bool p1) { - return __builtin_hlsl_length(p1); - // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}} + return length(p1); + // expected-error@-1 {{call to 'length' is ambiguous}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} } -bool builtin_length_int_to_float_promotion(int p1) +float length_int_to_float_promotion(int p1) { - return __builtin_hlsl_length(p1); - // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}} + return length(p1); + // expected-error@-1 {{call to 'length' is ambiguous}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} } -bool2 builtin_length_int2_to_float2_promotion(int2 p1) +float2 length_int2_to_float2_promotion(int2 p1) { - return __builtin_hlsl_length(p1); - // expected-error@-1 {{passing 'int2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}} + return length(p1); + // expected-error@-1 {{call to 'length' is ambiguous}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} } diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 3b1d1a88e01a8..ef48af5b42dbf 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -93,7 +93,6 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1 def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem]>; -def int_dx_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 51dc3025f0c37..cf142806bb1df 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -57,7 +57,6 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_nclamp: case Intrinsic::dx_degrees: case Intrinsic::dx_lerp: - case Intrinsic::dx_length: case Intrinsic::dx_normalize: case Intrinsic::dx_fdot: case Intrinsic::dx_sdot: @@ -292,32 +291,6 @@ static Value *expandAnyOrAllIntrinsic(CallInst *Orig, return Result; } -static Value *expandLengthIntrinsic(CallInst *Orig) { - Value *X = Orig->getOperand(0); - IRBuilder<> Builder(Orig); - Type *Ty = X->getType(); - Type *EltTy = Ty->getScalarType(); - - // Though dx.length does work on scalar type, we can optimize it to just emit - // fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because - // CGBuiltin.cpp should have emitted a fabs call. - Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0); - auto *XVec = dyn_cast(Ty); - unsigned XVecSize = XVec->getNumElements(); - if (!(Ty->isVectorTy() && XVecSize > 1)) - report_fatal_error(Twine("Invalid input type for length intrinsic"), - /* gen_crash_diag=*/false); - - Value *Sum = Builder.CreateFMul(Elt, Elt); - for (unsigned I = 1; I < XVecSize; I++) { - Elt = Builder.CreateExtractElement(X, I); - Value *Mul = Builder.CreateFMul(Elt, Elt); - Sum = Builder.CreateFAdd(Sum, Mul); - } - return Builder.CreateIntrinsic(EltTy, Intrinsic::sqrt, ArrayRef{Sum}, - nullptr, "elt.sqrt"); -} - static Value *expandLerpIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); Value *Y = Orig->getOperand(1); @@ -589,9 +562,6 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::dx_lerp: Result = expandLerpIntrinsic(Orig); break; - case Intrinsic::dx_length: - Result = expandLengthIntrinsic(Orig); - break; case Intrinsic::dx_normalize: Result = expandNormalizeIntrinsic(Orig); break; diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll deleted file mode 100644 index fc5868a7f6e82..0000000000000 --- a/llvm/test/CodeGen/DirectX/length.ll +++ /dev/null @@ -1,116 +0,0 @@ -; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK -; RUN: opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK - -; Make sure dxil operation function calls for length are generated for half/float. - -declare half @llvm.fabs.f16(half) -declare half @llvm.dx.length.v2f16(<2 x half>) -declare half @llvm.dx.length.v3f16(<3 x half>) -declare half @llvm.dx.length.v4f16(<4 x half>) - -declare float @llvm.fabs.f32(float) -declare float @llvm.dx.length.v2f32(<2 x float>) -declare float @llvm.dx.length.v3f32(<3 x float>) -declare float @llvm.dx.length.v4f32(<4 x float>) - -define noundef half @test_length_half2(<2 x half> noundef %p0) { -entry: - ; CHECK: extractelement <2 x half> %{{.*}}, i64 0 - ; CHECK: fmul half %{{.*}}, %{{.*}} - ; CHECK: extractelement <2 x half> %{{.*}}, i64 1 - ; CHECK: fmul half %{{.*}}, %{{.*}} - ; CHECK: fadd half %{{.*}}, %{{.*}} - ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}}) - ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}}) - - %hlsl.length = call half @llvm.dx.length.v2f16(<2 x half> %p0) - ret half %hlsl.length -} - -define noundef half @test_length_half3(<3 x half> noundef %p0) { -entry: - ; CHECK: extractelement <3 x half> %{{.*}}, i64 0 - ; CHECK: fmul half %{{.*}}, %{{.*}} - ; CHECK: extractelement <3 x half> %{{.*}}, i64 1 - ; CHECK: fmul half %{{.*}}, %{{.*}} - ; CHECK: fadd half %{{.*}}, %{{.*}} - ; CHECK: extractelement <3 x half> %{{.*}}, i64 2 - ; CHECK: fmul half %{{.*}}, %{{.*}} - ; CHECK: fadd half %{{.*}}, %{{.*}} - ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}}) - ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}}) - - %hlsl.length = call half @llvm.dx.length.v3f16(<3 x half> %p0) - ret half %hlsl.length -} - -define noundef half @test_length_half4(<4 x half> noundef %p0) { -entry: - ; CHECK: extractelement <4 x half> %{{.*}}, i64 0 - ; CHECK: fmul half %{{.*}}, %{{.*}} - ; CHECK: extractelement <4 x half> %{{.*}}, i64 1 - ; CHECK: fmul half %{{.*}}, %{{.*}} - ; CHECK: fadd half %{{.*}}, %{{.*}} - ; CHECK: extractelement <4 x half> %{{.*}}, i64 2 - ; CHECK: fmul half %{{.*}}, %{{.*}} - ; CHECK: fadd half %{{.*}}, %{{.*}} - ; CHECK: extractelement <4 x half> %{{.*}}, i64 3 - ; CHECK: fmul half %{{.*}}, %{{.*}} - ; CHECK: fadd half %{{.*}}, %{{.*}} - ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}}) - ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}}) - - %hlsl.length = call half @llvm.dx.length.v4f16(<4 x half> %p0) - ret half %hlsl.length -} - -define noundef float @test_length_float2(<2 x float> noundef %p0) { -entry: - ; CHECK: extractelement <2 x float> %{{.*}}, i64 0 - ; CHECK: fmul float %{{.*}}, %{{.*}} - ; CHECK: extractelement <2 x float> %{{.*}}, i64 1 - ; CHECK: fmul float %{{.*}}, %{{.*}} - ; CHECK: fadd float %{{.*}}, %{{.*}} - ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) - ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) - - %hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %p0) - ret float %hlsl.length -} - -define noundef float @test_length_float3(<3 x float> noundef %p0) { -entry: - ; CHECK: extractelement <3 x float> %{{.*}}, i64 0 - ; CHECK: fmul float %{{.*}}, %{{.*}} - ; CHECK: extractelement <3 x float> %{{.*}}, i64 1 - ; CHECK: fmul float %{{.*}}, %{{.*}} - ; CHECK: fadd float %{{.*}}, %{{.*}} - ; CHECK: extractelement <3 x float> %{{.*}}, i64 2 - ; CHECK: fmul float %{{.*}}, %{{.*}} - ; CHECK: fadd float %{{.*}}, %{{.*}} - ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) - ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) - - %hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %p0) - ret float %hlsl.length -} - -define noundef float @test_length_float4(<4 x float> noundef %p0) { -entry: - ; CHECK: extractelement <4 x float> %{{.*}}, i64 0 - ; CHECK: fmul float %{{.*}}, %{{.*}} - ; CHECK: extractelement <4 x float> %{{.*}}, i64 1 - ; CHECK: fmul float %{{.*}}, %{{.*}} - ; CHECK: fadd float %{{.*}}, %{{.*}} - ; CHECK: extractelement <4 x float> %{{.*}}, i64 2 - ; CHECK: fmul float %{{.*}}, %{{.*}} - ; CHECK: fadd float %{{.*}}, %{{.*}} - ; CHECK: extractelement <4 x float> %{{.*}}, i64 3 - ; CHECK: fmul float %{{.*}}, %{{.*}} - ; CHECK: fadd float %{{.*}}, %{{.*}} - ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) - ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) - - %hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %p0) - ret float %hlsl.length -} diff --git a/llvm/test/CodeGen/DirectX/length_error.ll b/llvm/test/CodeGen/DirectX/length_error.ll deleted file mode 100644 index 143b41fc506e1..0000000000000 --- a/llvm/test/CodeGen/DirectX/length_error.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s - -; DXIL operation length does not support double overload type -; CHECK: Cannot create Sqrt operation: Invalid overload type - -define noundef double @test_length_double2(<2 x double> noundef %p0) { -entry: - %hlsl.length = call double @llvm.dx.length.v2f32(<2 x double> %p0) - ret double %hlsl.length -} diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll deleted file mode 100644 index f722de2f9029e..0000000000000 --- a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s - -; DXIL operation length does not support 1-element vector types. -; CHECK: LLVM ERROR: Invalid input type for length intrinsic - -define noundef float @test_length_float(<1 x float> noundef %p0) { -entry: - %hlsl.length = call float @llvm.dx.length.v1f32(<1 x float> %p0) - ret float %hlsl.length -} diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll deleted file mode 100644 index ac3a0513eb6b2..0000000000000 --- a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s - -; DXIL operation length does not support scalar types -; CHECK: error: invalid intrinsic signature - -define noundef float @test_length_float(float noundef %p0) { -entry: - %hlsl.length = call float @llvm.dx.length.f32(float %p0) - ret float %hlsl.length -} From b6cda338ab7bd18265a3e3fd494856620aec5210 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 9 Jan 2025 18:19:39 +0000 Subject: [PATCH 22/79] [Loads] Also consider getPointerAlignment when checking assumptions. (#120916) Also use getPointerAlignment when trying to use alignment and dereferenceable assumptions. This catches cases where dereferencable is known via the assumption but alignment is known via getPointerAlignment (e.g. via argument attribute or align of 1) PR: https://github.com/llvm/llvm-project/pull/120916 --- llvm/lib/Analysis/Loads.cpp | 6 ++- ...able-info-from-assumption-constant-size.ll | 50 +++---------------- 2 files changed, 12 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index bc03e4052a705..7bbd469bd035d 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -170,9 +170,10 @@ static bool isDereferenceableAndAlignedPointer( if (CtxI) { /// Look through assumes to see if both dereferencability and alignment can - /// be provent by an assume + /// be proven by an assume if needed. RetainedKnowledge AlignRK; RetainedKnowledge DerefRK; + bool IsAligned = V->getPointerAlignment(DL) >= Alignment; if (getKnowledgeForValue( V, {Attribute::Dereferenceable, Attribute::Alignment}, AC, [&](RetainedKnowledge RK, Instruction *Assume, auto) { @@ -182,7 +183,8 @@ static bool isDereferenceableAndAlignedPointer( AlignRK = std::max(AlignRK, RK); if (RK.AttrKind == Attribute::Dereferenceable) DerefRK = std::max(DerefRK, RK); - if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() && + IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value(); + if (IsAligned && DerefRK && DerefRK.ArgValue >= Size.getZExtValue()) return true; // We have found what we needed so we stop looking return false; // Other assumes may have better information. so diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index 1333451a0077e..815e6bce52c0a 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -104,8 +104,8 @@ exit: ret void } -define void @deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias %a, ptr noalias %b, ptr noalias %c) { -; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_loop_invariant_ptr( +define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; CHECK-LABEL: define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr( ; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4) ] @@ -899,32 +899,15 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noali ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 @@ -1168,32 +1151,15 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 From 6beaa4552686685e93e31dcb6275e18c4c17acd9 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 13:25:10 -0500 Subject: [PATCH 23/79] [RISCV][VLOPT] Fix VCFCT incompatible EEW test (#122327) https://github.com/llvm/llvm-project/pull/122151 added this test with an invalid SEW. Use a valid SEW here. --- llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir index 2359fae9389d0..389a7ff1fdbc3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir @@ -1423,9 +1423,9 @@ name: vfred_vs1_vs2_incompatible_eew body: | bb.0: ; CHECK-LABEL: name: vfred_vs1_vs2_incompatible_eew - ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 6 /* e64 */, 0 /* tu, mu */ ; CHECK-NEXT: %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 /* tu, mu */ - %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 3 /* e8 */, 0 + %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 6 /* e64 */, 0 %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 ... --- From cbcb7ad32e6faca1f4c0f2f436e6076774104e17 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Thu, 9 Jan 2025 10:27:37 -0800 Subject: [PATCH 24/79] [mlir][acc] Introduce MappableType interface (#122146) OpenACC data clause operations previously required that the variable operand implemented PointerLikeType interface. This was a reasonable constraint because the dialects currently mixed with `acc` do use pointers to represent variables. However, this forces the "pointer" abstraction to be exposed too early and some cases are not cleanly representable through this approach (more specifically FIR's `fix.box` abstraction). Thus, relax this by allowing a variable to be a type which implements either `PointerLikeType` interface or `MappableType` interface. --- mlir/docs/Dialects/OpenACCDialect.md | 98 +++++- mlir/include/mlir/Dialect/OpenACC/OpenACC.h | 44 ++- .../mlir/Dialect/OpenACC/OpenACCOps.td | 284 +++++++++++------- .../Dialect/OpenACC/OpenACCTypeInterfaces.td | 93 ++++++ mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 220 +++++++++++++- .../Dialect/OpenACC/OpenACCOpsTest.cpp | 128 ++++++-- 6 files changed, 696 insertions(+), 171 deletions(-) diff --git a/mlir/docs/Dialects/OpenACCDialect.md b/mlir/docs/Dialects/OpenACCDialect.md index 2f1bb194a167d..9e85d66ba340f 100755 --- a/mlir/docs/Dialects/OpenACCDialect.md +++ b/mlir/docs/Dialects/OpenACCDialect.md @@ -274,28 +274,96 @@ reference counters are zero, a delete action is performed. ### Types -There are a few acc dialect type categories to describe: -* type of acc data clause operation input `varPtr` - - The type of `varPtr` must be pointer-like. This is done by - attaching the `PointerLikeType` interface to the appropriate MLIR - type. Although memory/storage concept is a lower level abstraction, - it is useful because the OpenACC model distinguishes between host - and device memory explicitly - and the mapping between the two is - done through pointers. Thus, by explicitly requiring it in the - dialect, the appropriate language frontend must create storage or - use type that satisfies the mapping constraint. +Since the `acc dialect` is meant to be used alongside other dialects which +represent the source language, appropriate use of types and type interfaces is +key to ensuring compatibility. This section describes those considerations. + +#### Data Clause Operation Types + +Data clause operations (eg. `acc.copyin`) rely on the following type +considerations: +* type of acc data clause operation input `var` + - The type of `var` must be one with `PointerLikeType` or `MappableType` + interfaces attached. The first, `PointerLikeType`, is useful because + the OpenACC memory model distinguishes between host and device memory + explicitly - and the mapping between the two is done through pointers. Thus, + by explicitly requiring it in the dialect, the appropriate language + frontend must create storage or use type that satisfies the mapping + constraint. The second possibility, `MappableType` was added because + memory/storage concept is a lower level abstraction and not all dialects + choose to use a pointer abstraction especially in the case where semantics + are more complex (such as `fir.box` which represents Fortran descriptors + and is defined in the `fir` dialect used from `flang`). * type of result of acc data clause operations - The type of the acc data clause operation is exactly the same as - `varPtr`. This was done intentionally instead of introducing an - `acc.ref/ptr` type so that IR compatibility and the dialect's + `var`. This was done intentionally instead of introducing specific `acc` + output types so that so that IR compatibility and the dialect's existing strong type checking can be maintained. This is needed since the `acc` dialect must live within another dialect whose type - system is unknown to it. The only constraint is that the appropriate - dialect type must use the `PointerLikeType` interface. + system is unknown to it. +* variable type captured in `varType` + - When `var`'s type is `PointerLikeType`, the actual type of the target + may be lost. More specifically, dialects like `llvm` which use opaque + pointers, do not record the target variable's type. The use of this field + bridges this gap. * type of decomposed clauses - Decomposed clauses, such as `acc.bounds` and `acc.declare_enter` produce types to allow their results to be used only in specific - operations. + operations. These are synthetic types solely used for proper IR + construction. + +#### Pointer-Like Requirement + +The need to have pointer-type requirement in the acc dialect stems from +a few different aspects: +- Existing dialects like `hlfir`, `fir`, `cir`, `llvm` use a pointer +representation for variables. +- Reference counters (for data clauses) are described in terms of +memory. In OpenACC spec 3.3 in section 2.6.7. It says: "A structured reference +counter is incremented when entering each data or compute region that contain an +explicit data clause or implicitly-determined data attributes for that section +of memory". This implies addressability of memory. +- Attach semantics (2.6.8 attachment counter) are specified using +"address" terminology: "The attachment counter for a pointer is set to +one whenever the pointer is attached to new target address, and +incremented whenever an attach action for that pointer is performed for +the same target address. + +#### Type Interfaces + +The `acc` dialect describes two different type interfaces which must be +implemented and attached to the source dialect's types in order to allow use +of data clause operations (eg. `acc.copyin`). They are as follows: +* `PointerLikeType` + - The idea behind this interface is that variables end up being represented + as pointers in many dialects. More specifically, `fir`, `cir`, `llvm` + represent user declared local variables with some dialect specific form of + `alloca` operation which produce pointers. Globals, similarly, are referred by + their address through some form of `address_of` operation. Additionally, an + implementation for OpenACC runtime needs to distinguish between device and + host memory - also typically done by talking about pointers. So this type + interface requirement fits in naturally with OpenACC specification. Data + mapping operation semantics can often be simply described by a pointer and + size of the data it points to. +* `MappableType` + - This interface was introduced because the `PointerLikeType` requirement + cannot represent cases when the source dialect does not use pointers. Also, + some cases, such as Fortran descriptor-backed arrays and Fortran optional + arguments, require decomposition into multiple steps. For example, in the + descriptor case, mapping of descriptor is needed, mapping of the data, and + implicit attach into device descriptor. In order to allow capturing all of + this complexity with a single data clause operation, the `MappableType` + interface was introduced. This is consistent with the dialect's goals + including being "able to regenerate the semantic equivalent of the user + pragmas". + +The intent is that a dialect's type system implements one of these two +interfaces. And to be precise, a type should only implement one or the other +(and not both) - since keeping them separate avoids ambiguity on what actually +needs mapped. When `var` is `PointerLikeType`, the assumption is that the data +pointed-to will be mapped. If the pointer-like type also implemented +`MappableType` interface, it becomes ambiguous whether the data pointed to or +the pointer itself is being mapped. ### Recipes diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h index cda07d6a91364..748cb7f28fc8c 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -25,6 +25,7 @@ #include "mlir/Dialect/OpenACC/OpenACCOpsInterfaces.h.inc" #include "mlir/Dialect/OpenACC/OpenACCTypeInterfaces.h.inc" #include "mlir/Dialect/OpenACCMPCommon/Interfaces/AtomicInterfaces.h" +#include "mlir/IR/Value.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/LoopLikeInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" @@ -83,16 +84,31 @@ namespace acc { /// combined and the final mapping value would be 5 (4 | 1). enum OpenACCExecMapping { NONE = 0, VECTOR = 1, WORKER = 2, GANG = 4 }; -/// Used to obtain the `varPtr` from a data clause operation. +/// Used to obtain the `var` from a data clause operation. /// Returns empty value if not a data clause operation or is a data exit -/// operation with no `varPtr`. -mlir::Value getVarPtr(mlir::Operation *accDataClauseOp); - -/// Used to obtain the `accPtr` from a data clause operation. -/// When a data entry operation, it obtains its result `accPtr` value. -/// If a data exit operation, it obtains its operand `accPtr` value. +/// operation with no `var`. +mlir::Value getVar(mlir::Operation *accDataClauseOp); + +/// Used to obtain the `var` from a data clause operation if it implements +/// `PointerLikeType`. +mlir::TypedValue +getVarPtr(mlir::Operation *accDataClauseOp); + +/// Used to obtains the `varType` from a data clause operation which records +/// the type of variable. When `var` is `PointerLikeType`, this returns +/// the type of the pointer target. +mlir::Type getVarType(mlir::Operation *accDataClauseOp); + +/// Used to obtain the `accVar` from a data clause operation. +/// When a data entry operation, it obtains its result `accVar` value. +/// If a data exit operation, it obtains its operand `accVar` value. /// Returns empty value if not a data clause operation. -mlir::Value getAccPtr(mlir::Operation *accDataClauseOp); +mlir::Value getAccVar(mlir::Operation *accDataClauseOp); + +/// Used to obtain the `accVar` from a data clause operation if it implements +/// `PointerLikeType`. +mlir::TypedValue +getAccPtr(mlir::Operation *accDataClauseOp); /// Used to obtain the `varPtrPtr` from a data clause operation. /// Returns empty value if not a data clause operation. @@ -136,6 +152,18 @@ mlir::ValueRange getDataOperands(mlir::Operation *accOp); /// Used to get a mutable range iterating over the data operands. mlir::MutableOperandRange getMutableDataOperands(mlir::Operation *accOp); +/// Used to check whether the provided `type` implements the `PointerLikeType` +/// interface. +inline bool isPointerLikeType(mlir::Type type) { + return mlir::isa(type); +} + +/// Used to check whether the provided `type` implements the `MappableType` +/// interface. +inline bool isMappableType(mlir::Type type) { + return mlir::isa(type); +} + /// Used to obtain the attribute name for declare. static constexpr StringLiteral getDeclareAttrName() { return StringLiteral("acc.declare"); diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 3ac265ac68756..a47f70b168066 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -70,7 +70,14 @@ def IntOrIndex : AnyTypeOf<[AnyInteger, Index]>; // Simple alias to pointer-like interface to reduce verbosity. def OpenACC_PointerLikeType : TypeAlias; + "pointer-like type">; +def OpenACC_MappableType : TypeAlias; + +def OpenACC_AnyPointerOrMappableLike : TypeConstraint, "any pointer or mappable">; +def OpenACC_AnyPointerOrMappableType : Type; // Define the OpenACC data clauses. There are a few cases where a modifier // is used, like create(zero), copyin(readonly), and copyout(zero). Since in @@ -353,7 +360,8 @@ def OpenACC_DataBoundsOp : OpenACC_Op<"bounds", build($_builder, $_state, ::mlir::acc::DataBoundsType::get($_builder.getContext()), /*lowerbound=*/{}, /*upperbound=*/{}, extent, - /*stride=*/{}, /*strideInBytes=*/nullptr, /*startIdx=*/{}); + /*stride=*/{}, /*strideInBytes=*/$_builder.getBoolAttr(false), + /*startIdx=*/{}); }] >, OpBuilder<(ins "::mlir::Value":$lowerbound, @@ -361,7 +369,8 @@ def OpenACC_DataBoundsOp : OpenACC_Op<"bounds", build($_builder, $_state, ::mlir::acc::DataBoundsType::get($_builder.getContext()), lowerbound, upperbound, /*extent=*/{}, - /*stride=*/{}, /*strideInBytes=*/nullptr, /*startIdx=*/{}); + /*stride=*/{}, /*strideInBytes=*/$_builder.getBoolAttr(false), + /*startIdx=*/{}); }] > ]; @@ -396,10 +405,15 @@ class OpenACC_DataEntryOp getVarPtr() { + return mlir::dyn_cast>(getVar()); + } + mlir::TypedValue getAccPtr() { + return mlir::dyn_cast>(getAccVar()); + } }]; let assemblyFormat = [{ - `varPtr` `(` $varPtr `:` custom(type($varPtr), $varType) + custom($var) `:` custom(type($var), $varType) oilist( `varPtrPtr` `(` $varPtrPtr `:` type($varPtrPtr) `)` | `bounds` `(` $bounds `)` | `async` `(` custom($asyncOperands, type($asyncOperands), $asyncOperandsDeviceType) `)` - ) `->` type($accPtr) attr-dict + ) `->` type($accVar) attr-dict }]; let hasVerifier = 1; - let builders = [OpBuilder<(ins "::mlir::Value":$varPtr, "bool":$structured, - "bool":$implicit, - CArg<"::mlir::ValueRange", "{}">:$bounds), - [{ + let builders = [ + OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr, + "bool":$structured, "bool":$implicit, + CArg<"::mlir::ValueRange", "{}">:$bounds), + [{ build($_builder, $_state, varPtr.getType(), varPtr, /*varType=*/::mlir::TypeAttr::get( - ::mlir::cast<::mlir::acc::PointerLikeType>( - varPtr.getType()).getElementType()), + varPtr.getType().getElementType()), /*varPtrPtr=*/{}, bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr, /*asyncOnly=*/nullptr, /*dataClause=*/nullptr, /*structured=*/$_builder.getBoolAttr(structured), /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/nullptr); }]>, - OpBuilder<(ins "::mlir::Value":$varPtr, "bool":$structured, - "bool":$implicit, "const ::llvm::Twine &":$name, - CArg<"::mlir::ValueRange", "{}">:$bounds), - [{ + OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr, + "bool":$structured, "bool":$implicit, + "const ::llvm::Twine &":$name, + CArg<"::mlir::ValueRange", "{}">:$bounds), + [{ build($_builder, $_state, varPtr.getType(), varPtr, /*varType=*/::mlir::TypeAttr::get( - ::mlir::cast<::mlir::acc::PointerLikeType>( - varPtr.getType()).getElementType()), + varPtr.getType().getElementType()), + /*varPtrPtr=*/{}, bounds, /*asyncOperands=*/{}, + /*asyncOperandsDeviceType=*/nullptr, + /*asyncOnly=*/nullptr, /*dataClause=*/nullptr, + /*structured=*/$_builder.getBoolAttr(structured), + /*implicit=*/$_builder.getBoolAttr(implicit), + /*name=*/$_builder.getStringAttr(name)); + }]>, + OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::MappableType>":$var, + "bool":$structured, "bool":$implicit, + CArg<"::mlir::ValueRange", "{}">:$bounds), + [{ + build($_builder, $_state, var.getType(), var, + /*varType=*/::mlir::TypeAttr::get(var.getType()), + /*varPtrPtr=*/{}, bounds, /*asyncOperands=*/{}, + /*asyncOperandsDeviceType=*/nullptr, + /*asyncOnly=*/nullptr, /*dataClause=*/nullptr, + /*structured=*/$_builder.getBoolAttr(structured), + /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/nullptr); + }]>, + OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::MappableType>":$var, + "bool":$structured, "bool":$implicit, + "const ::llvm::Twine &":$name, + CArg<"::mlir::ValueRange", "{}">:$bounds), + [{ + build($_builder, $_state, var.getType(), var, + /*varType=*/::mlir::TypeAttr::get(var.getType()), /*varPtrPtr=*/{}, bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr, /*asyncOnly=*/nullptr, /*dataClause=*/nullptr, @@ -506,10 +552,10 @@ class OpenACC_DataEntryOp { + (ins OpenACC_AnyPointerOrMappableType:$var)> { let summary = "Represents private semantics for acc private clause."; - let results = (outs Arg:$accPtr); + let results = (outs Arg:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -518,11 +564,11 @@ def OpenACC_PrivateOp : OpenACC_DataEntryOp<"private", //===----------------------------------------------------------------------===// def OpenACC_FirstprivateOp : OpenACC_DataEntryOp<"firstprivate", "mlir::acc::DataClause::acc_firstprivate", "", [], - (ins Arg:$varPtr)> { + (ins Arg:$var)> { let summary = "Represents firstprivate semantic for the acc firstprivate " "clause."; - let results = (outs Arg:$accPtr); + let results = (outs Arg:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -531,10 +577,10 @@ def OpenACC_FirstprivateOp : OpenACC_DataEntryOp<"firstprivate", //===----------------------------------------------------------------------===// def OpenACC_ReductionOp : OpenACC_DataEntryOp<"reduction", "mlir::acc::DataClause::acc_reduction", "", [], - (ins Arg:$varPtr)> { + (ins Arg:$var)> { let summary = "Represents reduction semantics for acc reduction clause."; - let results = (outs Arg:$accPtr); + let results = (outs Arg:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -544,9 +590,9 @@ def OpenACC_ReductionOp : OpenACC_DataEntryOp<"reduction", def OpenACC_DevicePtrOp : OpenACC_DataEntryOp<"deviceptr", "mlir::acc::DataClause::acc_deviceptr", "", [MemoryEffects<[MemRead]>], - (ins OpenACC_PointerLikeTypeInterface:$varPtr)> { + (ins OpenACC_AnyPointerOrMappableType:$var)> { let summary = "Specifies that the variable pointer is a device pointer."; - let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr); + let results = (outs OpenACC_AnyPointerOrMappableType:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -557,9 +603,9 @@ def OpenACC_PresentOp : OpenACC_DataEntryOp<"present", "mlir::acc::DataClause::acc_present", "", [MemoryEffects<[MemRead, MemWrite]>], - (ins OpenACC_PointerLikeTypeInterface:$varPtr)> { + (ins OpenACC_AnyPointerOrMappableType:$var)> { let summary = "Specifies that the variable is already present on device."; - let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr); + let results = (outs OpenACC_AnyPointerOrMappableType:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -570,11 +616,11 @@ def OpenACC_CopyinOp : OpenACC_DataEntryOp<"copyin", "mlir::acc::DataClause::acc_copyin", "", [MemoryEffects<[MemRead, MemWrite]>], - (ins Arg:$varPtr)> { + (ins Arg:$var)> { let summary = "Represents copyin semantics for acc data clauses like acc " "copyin and acc copy."; - let results = (outs Arg:$accPtr); + let results = (outs Arg:$accVar); let extraClassDeclaration = extraClassDeclarationBase # [{ /// Check if this is a copyin with readonly modifier. @@ -589,11 +635,11 @@ def OpenACC_CreateOp : OpenACC_DataEntryOp<"create", "mlir::acc::DataClause::acc_create", "", [MemoryEffects<[MemRead, MemWrite]>], - (ins OpenACC_PointerLikeTypeInterface:$varPtr)> { + (ins OpenACC_AnyPointerOrMappableType:$var)> { let summary = "Represents create semantics for acc data clauses like acc " "create and acc copyout."; - let results = (outs Arg:$accPtr); + let results = (outs Arg:$accVar); let extraClassDeclaration = extraClassDeclarationBase # [{ /// Check if this is a create with zero modifier. @@ -608,9 +654,9 @@ def OpenACC_NoCreateOp : OpenACC_DataEntryOp<"nocreate", "mlir::acc::DataClause::acc_no_create", "", [MemoryEffects<[MemRead, MemWrite]>], - (ins OpenACC_PointerLikeTypeInterface:$varPtr)> { + (ins OpenACC_AnyPointerOrMappableType:$var)> { let summary = "Represents acc no_create semantics."; - let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr); + let results = (outs OpenACC_AnyPointerOrMappableType:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -621,11 +667,11 @@ def OpenACC_AttachOp : OpenACC_DataEntryOp<"attach", "mlir::acc::DataClause::acc_attach", "", [MemoryEffects<[MemRead, MemWrite]>], - (ins Arg:$varPtr)> { + (ins Arg:$var)> { let summary = "Represents acc attach semantics which updates a pointer in " "device memory with the corresponding device address of the " "pointee."; - let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr); + let results = (outs OpenACC_AnyPointerOrMappableType:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -645,9 +691,9 @@ def OpenACC_GetDevicePtrOp : OpenACC_DataEntryOp<"getdeviceptr", that is any of the valid `mlir::acc::DataClause` entries. \ }], [MemoryEffects<[MemRead]>], - (ins OpenACC_PointerLikeTypeInterface:$varPtr)> { + (ins OpenACC_AnyPointerOrMappableType:$var)> { let summary = "Gets device address if variable exists on device."; - let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr); + let results = (outs OpenACC_AnyPointerOrMappableType:$accVar); let hasVerifier = 0; let extraClassDeclaration = extraClassDeclarationBase; } @@ -657,10 +703,10 @@ def OpenACC_GetDevicePtrOp : OpenACC_DataEntryOp<"getdeviceptr", //===----------------------------------------------------------------------===// def OpenACC_UpdateDeviceOp : OpenACC_DataEntryOp<"update_device", "mlir::acc::DataClause::acc_update_device", "", [], - (ins Arg:$varPtr)> { + (ins Arg:$var)> { let summary = "Represents acc update device semantics."; - let results = (outs Arg:$accPtr); + let results = (outs Arg:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -670,9 +716,9 @@ def OpenACC_UpdateDeviceOp : OpenACC_DataEntryOp<"update_device", def OpenACC_UseDeviceOp : OpenACC_DataEntryOp<"use_device", "mlir::acc::DataClause::acc_use_device", "", [MemoryEffects<[MemRead]>], - (ins OpenACC_PointerLikeTypeInterface:$varPtr)> { + (ins OpenACC_AnyPointerOrMappableType:$var)> { let summary = "Represents acc use_device semantics."; - let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr); + let results = (outs OpenACC_AnyPointerOrMappableType:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -682,9 +728,9 @@ def OpenACC_UseDeviceOp : OpenACC_DataEntryOp<"use_device", def OpenACC_DeclareDeviceResidentOp : OpenACC_DataEntryOp<"declare_device_resident", "mlir::acc::DataClause::acc_declare_device_resident", "", [MemoryEffects<[MemWrite]>], - (ins Arg:$varPtr)> { + (ins Arg:$var)> { let summary = "Represents acc declare device_resident semantics."; - let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr); + let results = (outs OpenACC_AnyPointerOrMappableType:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -694,9 +740,9 @@ def OpenACC_DeclareDeviceResidentOp : OpenACC_DataEntryOp<"declare_device_reside def OpenACC_DeclareLinkOp : OpenACC_DataEntryOp<"declare_link", "mlir::acc::DataClause::acc_declare_link", "", [MemoryEffects<[MemWrite]>], - (ins Arg:$varPtr)> { + (ins Arg:$var)> { let summary = "Represents acc declare link semantics."; - let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr); + let results = (outs OpenACC_AnyPointerOrMappableType:$accVar); let extraClassDeclaration = extraClassDeclarationBase; } @@ -705,10 +751,10 @@ def OpenACC_DeclareLinkOp : OpenACC_DataEntryOp<"declare_link", //===----------------------------------------------------------------------===// def OpenACC_CacheOp : OpenACC_DataEntryOp<"cache", "mlir::acc::DataClause::acc_cache", "", [NoMemoryEffect], - (ins OpenACC_PointerLikeTypeInterface:$varPtr)> { + (ins OpenACC_AnyPointerOrMappableType:$var)> { let summary = "Represents the cache directive that is associated with a " "loop."; - let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr); + let results = (outs OpenACC_AnyPointerOrMappableType:$accVar); let extraClassDeclaration = extraClassDeclarationBase # [{ /// Check if this is a cache with readonly modifier. @@ -738,7 +784,7 @@ class OpenACC_DataExitOp:$name)); let description = !strconcat(extraDescription, [{ - - `accPtr`: The acc address of variable. This is the link from the data-entry + - `accVar`: The acc variable. This is the link from the data-entry operation used. - `bounds`: Used when copying just slice of array or array's bounds are not encoded in type. They are in rank order where rank 0 is inner-most dimension. @@ -808,57 +854,67 @@ class OpenACC_DataExitOpWithVarPtr [MemoryEffects<[MemRead, MemWrite]>], (ins Arg:$accPtr, + "Accelerator mapped variable", [MemRead]>:$accVar, Arg:$varPtr, + "Host variable", [MemWrite]>:$var, TypeAttr:$varType)> { let assemblyFormat = [{ - `accPtr` `(` $accPtr `:` type($accPtr) `)` + custom($accVar, type($accVar)) (`bounds` `(` $bounds^ `)` )? (`async` `(` custom($asyncOperands, type($asyncOperands), $asyncOperandsDeviceType)^ `)`)? - `to` `varPtr` `(` $varPtr `:` custom(type($varPtr), $varType) + `to` custom($var) `:` custom(type($var), $varType) attr-dict }]; - let builders = [OpBuilder<(ins "::mlir::Value":$accPtr, - "::mlir::Value":$varPtr, "bool":$structured, - "bool":$implicit, - CArg<"::mlir::ValueRange", "{}">:$bounds), - [{ + let builders = [ + OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$accPtr, + "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr, + "bool":$structured, "bool":$implicit, + CArg<"::mlir::ValueRange", "{}">:$bounds), + [{ build($_builder, $_state, accPtr, varPtr, /*varType=*/::mlir::TypeAttr::get( - ::mlir::cast<::mlir::acc::PointerLikeType>( - varPtr.getType()).getElementType()), + varPtr.getType().getElementType()), bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr, /*asyncOnly=*/nullptr, /*dataClause=*/nullptr, /*structured=*/$_builder.getBoolAttr(structured), /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/nullptr); }]>, - OpBuilder<(ins "::mlir::Value":$accPtr, - "::mlir::Value":$varPtr, "bool":$structured, - "bool":$implicit, "const ::llvm::Twine &":$name, - CArg<"::mlir::ValueRange", "{}">:$bounds), - [{ + OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$accPtr, + "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr, + "bool":$structured, "bool":$implicit, + "const ::llvm::Twine &":$name, + CArg<"::mlir::ValueRange", "{}">:$bounds), + [{ build($_builder, $_state, accPtr, varPtr, /*varType=*/::mlir::TypeAttr::get( - ::mlir::cast<::mlir::acc::PointerLikeType>( - varPtr.getType()).getElementType()), + varPtr.getType().getElementType()), bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr, /*asyncOnly=*/nullptr, /*dataClause=*/nullptr, /*structured=*/$_builder.getBoolAttr(structured), /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/$_builder.getStringAttr(name)); }]>]; + + code extraClassDeclarationDataExit = [{ + mlir::TypedValue getVarPtr() { + return mlir::dyn_cast>(getVar()); + } + mlir::TypedValue getAccPtr() { + return mlir::dyn_cast>(getAccVar()); + } + }]; } class OpenACC_DataExitOpNoVarPtr : OpenACC_DataExitOp, MemWrite]>], - (ins Arg:$accPtr)> { + (ins Arg:$accVar)> { let assemblyFormat = [{ - `accPtr` `(` $accPtr `:` type($accPtr) `)` + custom($accVar, type($accVar)) (`bounds` `(` $bounds^ `)` )? (`async` `(` custom($asyncOperands, type($asyncOperands), $asyncOperandsDeviceType)^ `)`)? @@ -866,31 +922,35 @@ class OpenACC_DataExitOpNoVarPtr : }]; let builders = [ - OpBuilder<(ins "::mlir::Value":$accPtr, - "bool":$structured, - "bool":$implicit, - CArg<"::mlir::ValueRange", "{}">:$bounds), [{ + OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$accPtr, + "bool":$structured, "bool":$implicit, + CArg<"::mlir::ValueRange", "{}">:$bounds), + [{ build($_builder, $_state, accPtr, bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr, /*asyncOnly=*/nullptr, /*dataClause=*/nullptr, /*structured=*/$_builder.getBoolAttr(structured), /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/nullptr); - }] - >, - OpBuilder<(ins "::mlir::Value":$accPtr, - "bool":$structured, - "bool":$implicit, - "const ::llvm::Twine &":$name, - CArg<"::mlir::ValueRange", "{}">:$bounds), [{ + }]>, + OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$accPtr, + "bool":$structured, "bool":$implicit, + "const ::llvm::Twine &":$name, + CArg<"::mlir::ValueRange", "{}">:$bounds), + [{ build($_builder, $_state, accPtr, bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr, /*asyncOnly=*/nullptr, /*dataClause=*/nullptr, /*structured=*/$_builder.getBoolAttr(structured), /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/$_builder.getStringAttr(name)); - }] - > + }]> ]; + + code extraClassDeclarationDataExit = [{ + mlir::TypedValue getAccPtr() { + return mlir::dyn_cast>(getAccVar()); + } + }]; } //===----------------------------------------------------------------------===// @@ -900,7 +960,7 @@ def OpenACC_CopyoutOp : OpenACC_DataExitOpWithVarPtr<"copyout", "mlir::acc::DataClause::acc_copyout"> { let summary = "Represents acc copyout semantics - reverse of copyin."; - let extraClassDeclaration = extraClassDeclarationBase # [{ + let extraClassDeclaration = extraClassDeclarationBase # extraClassDeclarationDataExit # [{ /// Check if this is a copyout with zero modifier. bool isCopyoutZero(); }]; @@ -912,7 +972,7 @@ def OpenACC_CopyoutOp : OpenACC_DataExitOpWithVarPtr<"copyout", def OpenACC_DeleteOp : OpenACC_DataExitOpNoVarPtr<"delete", "mlir::acc::DataClause::acc_delete"> { let summary = "Represents acc delete semantics - reverse of create."; - let extraClassDeclaration = extraClassDeclarationBase; + let extraClassDeclaration = extraClassDeclarationBase # extraClassDeclarationDataExit; } //===----------------------------------------------------------------------===// @@ -921,7 +981,7 @@ def OpenACC_DeleteOp : OpenACC_DataExitOpNoVarPtr<"delete", def OpenACC_DetachOp : OpenACC_DataExitOpNoVarPtr<"detach", "mlir::acc::DataClause::acc_detach"> { let summary = "Represents acc detach semantics - reverse of attach."; - let extraClassDeclaration = extraClassDeclarationBase; + let extraClassDeclaration = extraClassDeclarationBase # extraClassDeclarationDataExit; } //===----------------------------------------------------------------------===// @@ -930,7 +990,7 @@ def OpenACC_DetachOp : OpenACC_DataExitOpNoVarPtr<"detach", def OpenACC_UpdateHostOp : OpenACC_DataExitOpWithVarPtr<"update_host", "mlir::acc::DataClause::acc_update_host"> { let summary = "Represents acc update host semantics."; - let extraClassDeclaration = extraClassDeclarationBase # [{ + let extraClassDeclaration = extraClassDeclarationBase # extraClassDeclarationDataExit # [{ /// Check if this is an acc update self. bool isSelf() { return getDataClause() == acc::DataClause::acc_update_self; @@ -1193,11 +1253,11 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel", UnitAttr:$selfAttr, Variadic:$reductionOperands, OptionalAttr:$reductionRecipes, - Variadic:$privateOperands, + Variadic:$privateOperands, OptionalAttr:$privatizations, - Variadic:$firstprivateOperands, + Variadic:$firstprivateOperands, OptionalAttr:$firstprivatizations, - Variadic:$dataClauseOperands, + Variadic:$dataClauseOperands, OptionalAttr:$defaultAttr, UnitAttr:$combined); @@ -1353,11 +1413,11 @@ def OpenACC_SerialOp : OpenACC_Op<"serial", UnitAttr:$selfAttr, Variadic:$reductionOperands, OptionalAttr:$reductionRecipes, - Variadic:$privateOperands, + Variadic:$privateOperands, OptionalAttr:$privatizations, - Variadic:$firstprivateOperands, + Variadic:$firstprivateOperands, OptionalAttr:$firstprivatizations, - Variadic:$dataClauseOperands, + Variadic:$dataClauseOperands, OptionalAttr:$defaultAttr, UnitAttr:$combined); @@ -1482,7 +1542,7 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels", Optional:$ifCond, Optional:$selfCond, UnitAttr:$selfAttr, - Variadic:$dataClauseOperands, + Variadic:$dataClauseOperands, OptionalAttr:$defaultAttr, UnitAttr:$combined); @@ -1614,7 +1674,7 @@ def OpenACC_DataOp : OpenACC_Op<"data", OptionalAttr:$waitOperandsDeviceType, OptionalAttr:$hasWaitDevnum, OptionalAttr:$waitOnly, - Variadic:$dataClauseOperands, + Variadic:$dataClauseOperands, OptionalAttr:$defaultAttr); let regions = (region AnyRegion:$region); @@ -1709,7 +1769,7 @@ def OpenACC_EnterDataOp : OpenACC_Op<"enter_data", Optional:$waitDevnum, Variadic:$waitOperands, UnitAttr:$wait, - Variadic:$dataClauseOperands); + Variadic:$dataClauseOperands); let extraClassDeclaration = [{ /// The number of data operands. @@ -1760,7 +1820,7 @@ def OpenACC_ExitDataOp : OpenACC_Op<"exit_data", Optional:$waitDevnum, Variadic:$waitOperands, UnitAttr:$wait, - Variadic:$dataClauseOperands, + Variadic:$dataClauseOperands, UnitAttr:$finalize); let extraClassDeclaration = [{ @@ -1810,7 +1870,7 @@ def OpenACC_HostDataOp : OpenACC_Op<"host_data", }]; let arguments = (ins Optional:$ifCond, - Variadic:$dataClauseOperands, + Variadic:$dataClauseOperands, UnitAttr:$ifPresent); let regions = (region AnyRegion:$region); @@ -1887,8 +1947,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", Variadic:$tileOperands, OptionalAttr:$tileOperandsSegments, OptionalAttr:$tileOperandsDeviceType, - Variadic:$cacheOperands, - Variadic:$privateOperands, + Variadic:$cacheOperands, + Variadic:$privateOperands, OptionalAttr:$privatizations, Variadic:$reductionOperands, OptionalAttr:$reductionRecipes, @@ -2201,7 +2261,7 @@ def OpenACC_DeclareEnterOp : OpenACC_Op<"declare_enter", ``` }]; - let arguments = (ins Variadic:$dataClauseOperands); + let arguments = (ins Variadic:$dataClauseOperands); let results = (outs OpenACC_DeclareTokenType:$token); let assemblyFormat = [{ @@ -2236,7 +2296,7 @@ def OpenACC_DeclareExitOp : OpenACC_Op<"declare_exit", let arguments = (ins Optional:$token, - Variadic:$dataClauseOperands); + Variadic:$dataClauseOperands); let assemblyFormat = [{ oilist( @@ -2338,7 +2398,7 @@ def OpenACC_DeclareOp : OpenACC_Op<"declare", }]; let arguments = (ins - Variadic:$dataClauseOperands); + Variadic:$dataClauseOperands); let regions = (region AnyRegion:$region); @@ -2589,7 +2649,7 @@ def OpenACC_UpdateOp : OpenACC_Op<"update", OptionalAttr:$waitOperandsDeviceType, OptionalAttr:$hasWaitDevnum, OptionalAttr:$waitOnly, - Variadic:$dataClauseOperands, + Variadic:$dataClauseOperands, UnitAttr:$ifPresent); let extraClassDeclaration = [{ diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td index 0a3edd5637704..bec46be89f058 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td @@ -31,4 +31,97 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> { ]; } +def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> { + let cppNamespace = "::mlir::acc"; + + let description = [{ + An interface to capture type-based semantics for mapping in a manner that + makes it convertible to size-based semantics. + }]; + + let methods = [ + InterfaceMethod< + /*description=*/[{ + Returns the pointer to the `var` if recoverable (such as in cases + where the current operation is a load from a memory slot). + }], + /*retTy=*/"::mlir::TypedValue<::mlir::acc::PointerLikeType>", + /*methodName=*/"getVarPtr", + /*args=*/(ins "::mlir::Value":$var), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + if (auto ptr = mlir::dyn_cast>( + var)) + return ptr; + return {}; + }] + >, + InterfaceMethod< + /*description=*/[{ + Returns the size in bytes when computable. If this is an array-like + type, avoiding passing `accBounds` ensures a computation of the size + of whole type. + }], + /*retTy=*/"::std::optional<::llvm::TypeSize>", + /*methodName=*/"getSizeInBytes", + /*args=*/(ins "::mlir::Value":$var, + "::mlir::ValueRange":$accBounds, + "const ::mlir::DataLayout &":$dataLayout), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + // Bounds operations are typically created for array types. In the + // generic implementation, it is not straightforward to distinguish + // between array types and ensure the size and offset take into account + // just the slice requested. Thus return not-computable for now. + if (!accBounds.empty()) + return {}; + return {dataLayout.getTypeSize($_type)}; + }] + >, + InterfaceMethod< + /*description=*/[{ + Returns the offset in bytes when computable. + }], + /*retTy=*/"::std::optional<::int64_t>", + /*methodName=*/"getOffsetInBytes", + /*args=*/(ins "::mlir::Value":$var, + "::mlir::ValueRange":$accBounds, + "const ::mlir::DataLayout &":$dataLayout), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + // Bounds operations are typically created for array types. In the + // generic implementation, it is not straightforward to distinguish + // between array types and ensure the size and offset take into account + // just the slice requested. Thus return not-computable for now. + if (!accBounds.empty()) + return {}; + + // If the type size is computable, it means it is trivial. Assume + // offset of 0. + if (::mlir::cast<::mlir::acc::MappableType>($_type).getSizeInBytes( + var, accBounds, dataLayout).has_value()) { + return {0}; + } + + return {}; + }] + >, + InterfaceMethod< + /*description=*/[{ + Returns explicit `acc.bounds` operations that envelop the whole + data structure. These operations are inserted using the provided builder + at the location set before calling this API. + }], + /*retTy=*/"::llvm::SmallVector<::mlir::Value>", + /*methodName=*/"generateAccBounds", + /*args=*/(ins "::mlir::Value":$var, + "::mlir::OpBuilder &":$builder), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return {}; + }] + >, + ]; +} + #endif // OPENACC_TYPE_INTERFACES diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index d490376db0e7f..735dd4fd7b8bb 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -16,6 +16,7 @@ #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/Support/LLVM.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/TypeSwitch.h" @@ -192,6 +193,97 @@ static LogicalResult checkWaitAndAsyncConflict(Op op) { return success(); } +template +static LogicalResult checkVarAndVarType(Op op) { + if (!op.getVar()) + return op.emitError("must have var operand"); + + if (mlir::isa(op.getVar().getType()) && + mlir::isa(op.getVar().getType())) { + // TODO: If a type implements both interfaces (mappable and pointer-like), + // it is unclear which semantics to apply without additional info which + // would need captured in the data operation. For now restrict this case + // unless a compelling reason to support disambiguating between the two. + return op.emitError("var must be mappable or pointer-like (not both)"); + } + + if (!mlir::isa(op.getVar().getType()) && + !mlir::isa(op.getVar().getType())) + return op.emitError("var must be mappable or pointer-like"); + + if (mlir::isa(op.getVar().getType()) && + op.getVarType() != op.getVar().getType()) + return op.emitError("varType must match when var is mappable"); + + return success(); +} + +template +static LogicalResult checkVarAndAccVar(Op op) { + if (op.getVar().getType() != op.getAccVar().getType()) + return op.emitError("input and output types must match"); + + return success(); +} + +static ParseResult parseVar(mlir::OpAsmParser &parser, + OpAsmParser::UnresolvedOperand &var) { + // Either `var` or `varPtr` keyword is required. + if (failed(parser.parseOptionalKeyword("varPtr"))) { + if (failed(parser.parseKeyword("var"))) + return failure(); + } + if (failed(parser.parseLParen())) + return failure(); + if (failed(parser.parseOperand(var))) + return failure(); + + return success(); +} + +static void printVar(mlir::OpAsmPrinter &p, mlir::Operation *op, + mlir::Value var) { + if (mlir::isa(var.getType())) + p << "varPtr("; + else + p << "var("; + p.printOperand(var); +} + +static ParseResult parseAccVar(mlir::OpAsmParser &parser, + OpAsmParser::UnresolvedOperand &var, + mlir::Type &accVarType) { + // Either `accVar` or `accPtr` keyword is required. + if (failed(parser.parseOptionalKeyword("accPtr"))) { + if (failed(parser.parseKeyword("accVar"))) + return failure(); + } + if (failed(parser.parseLParen())) + return failure(); + if (failed(parser.parseOperand(var))) + return failure(); + if (failed(parser.parseColon())) + return failure(); + if (failed(parser.parseType(accVarType))) + return failure(); + if (failed(parser.parseRParen())) + return failure(); + + return success(); +} + +static void printAccVar(mlir::OpAsmPrinter &p, mlir::Operation *op, + mlir::Value accVar, mlir::Type accVarType) { + if (mlir::isa(accVar.getType())) + p << "accPtr("; + else + p << "accVar("; + p.printOperand(accVar); + p << " : "; + p.printType(accVarType); + p << ")"; +} + static ParseResult parseVarPtrType(mlir::OpAsmParser &parser, mlir::Type &varPtrType, mlir::TypeAttr &varTypeAttr) { @@ -211,8 +303,11 @@ static ParseResult parseVarPtrType(mlir::OpAsmParser &parser, return failure(); } else { // Set `varType` from the element type of the type of `varPtr`. - varTypeAttr = mlir::TypeAttr::get( - mlir::cast(varPtrType).getElementType()); + if (mlir::isa(varPtrType)) + varTypeAttr = mlir::TypeAttr::get( + mlir::cast(varPtrType).getElementType()); + else + varTypeAttr = mlir::TypeAttr::get(varPtrType); } return success(); @@ -226,8 +321,11 @@ static void printVarPtrType(mlir::OpAsmPrinter &p, mlir::Operation *op, // Print the `varType` only if it differs from the element type of // `varPtr`'s type. mlir::Type varType = varTypeAttr.getValue(); - if (mlir::cast(varPtrType).getElementType() != - varType) { + mlir::Type typeToCheckAgainst = + mlir::isa(varPtrType) + ? mlir::cast(varPtrType).getElementType() + : varPtrType; + if (typeToCheckAgainst != varType) { p << " varType("; p.printType(varType); p << ")"; @@ -252,6 +350,8 @@ LogicalResult acc::PrivateOp::verify() { if (getDataClause() != acc::DataClause::acc_private) return emitError( "data clause associated with private operation must match its intent"); + if (failed(checkVarAndVarType(*this))) + return failure(); return success(); } @@ -262,6 +362,8 @@ LogicalResult acc::FirstprivateOp::verify() { if (getDataClause() != acc::DataClause::acc_firstprivate) return emitError("data clause associated with firstprivate operation must " "match its intent"); + if (failed(checkVarAndVarType(*this))) + return failure(); return success(); } @@ -272,6 +374,8 @@ LogicalResult acc::ReductionOp::verify() { if (getDataClause() != acc::DataClause::acc_reduction) return emitError("data clause associated with reduction operation must " "match its intent"); + if (failed(checkVarAndVarType(*this))) + return failure(); return success(); } @@ -282,6 +386,10 @@ LogicalResult acc::DevicePtrOp::verify() { if (getDataClause() != acc::DataClause::acc_deviceptr) return emitError("data clause associated with deviceptr operation must " "match its intent"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -292,6 +400,10 @@ LogicalResult acc::PresentOp::verify() { if (getDataClause() != acc::DataClause::acc_present) return emitError( "data clause associated with present operation must match its intent"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -307,6 +419,10 @@ LogicalResult acc::CopyinOp::verify() { return emitError( "data clause associated with copyin operation must match its intent" " or specify original clause this operation was decomposed from"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -326,6 +442,10 @@ LogicalResult acc::CreateOp::verify() { return emitError( "data clause associated with create operation must match its intent" " or specify original clause this operation was decomposed from"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -342,6 +462,10 @@ LogicalResult acc::NoCreateOp::verify() { if (getDataClause() != acc::DataClause::acc_no_create) return emitError("data clause associated with no_create operation must " "match its intent"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -352,6 +476,10 @@ LogicalResult acc::AttachOp::verify() { if (getDataClause() != acc::DataClause::acc_attach) return emitError( "data clause associated with attach operation must match its intent"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -363,6 +491,10 @@ LogicalResult acc::DeclareDeviceResidentOp::verify() { if (getDataClause() != acc::DataClause::acc_declare_device_resident) return emitError("data clause associated with device_resident operation " "must match its intent"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -374,6 +506,10 @@ LogicalResult acc::DeclareLinkOp::verify() { if (getDataClause() != acc::DataClause::acc_declare_link) return emitError( "data clause associated with link operation must match its intent"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -389,8 +525,12 @@ LogicalResult acc::CopyoutOp::verify() { return emitError( "data clause associated with copyout operation must match its intent" " or specify original clause this operation was decomposed from"); - if (!getVarPtr() || !getAccPtr()) + if (!getVar() || !getAccVar()) return emitError("must have both host and device pointers"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -414,7 +554,7 @@ LogicalResult acc::DeleteOp::verify() { return emitError( "data clause associated with delete operation must match its intent" " or specify original clause this operation was decomposed from"); - if (!getAccPtr()) + if (!getAccVar()) return emitError("must have device pointer"); return success(); } @@ -429,7 +569,7 @@ LogicalResult acc::DetachOp::verify() { return emitError( "data clause associated with detach operation must match its intent" " or specify original clause this operation was decomposed from"); - if (!getAccPtr()) + if (!getAccVar()) return emitError("must have device pointer"); return success(); } @@ -444,8 +584,12 @@ LogicalResult acc::UpdateHostOp::verify() { return emitError( "data clause associated with host operation must match its intent" " or specify original clause this operation was decomposed from"); - if (!getVarPtr() || !getAccPtr()) + if (!getVar() || !getAccVar()) return emitError("must have both host and device pointers"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -458,6 +602,10 @@ LogicalResult acc::UpdateDeviceOp::verify() { return emitError( "data clause associated with device operation must match its intent" " or specify original clause this operation was decomposed from"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -470,6 +618,10 @@ LogicalResult acc::UseDeviceOp::verify() { return emitError( "data clause associated with use_device operation must match its intent" " or specify original clause this operation was decomposed from"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -483,6 +635,10 @@ LogicalResult acc::CacheOp::verify() { return emitError( "data clause associated with cache operation must match its intent" " or specify original clause this operation was decomposed from"); + if (failed(checkVarAndVarType(*this))) + return failure(); + if (failed(checkVarAndAccVar(*this))) + return failure(); return success(); } @@ -502,7 +658,7 @@ static ParseResult parseRegions(OpAsmParser &parser, OperationState &state, } static bool isComputeOperation(Operation *op) { - return isa(op); + return isa(op); } namespace { @@ -2917,20 +3073,56 @@ LogicalResult acc::WaitOp::verify() { // acc dialect utilities //===----------------------------------------------------------------------===// -mlir::Value mlir::acc::getVarPtr(mlir::Operation *accDataClauseOp) { - auto varPtr{llvm::TypeSwitch(accDataClauseOp) +mlir::TypedValue +mlir::acc::getVarPtr(mlir::Operation *accDataClauseOp) { + auto varPtr{llvm::TypeSwitch>( + accDataClauseOp) .Case( [&](auto entry) { return entry.getVarPtr(); }) .Case( [&](auto exit) { return exit.getVarPtr(); }) - .Default([&](mlir::Operation *) { return mlir::Value(); })}; + .Default([&](mlir::Operation *) { + return mlir::TypedValue(); + })}; return varPtr; } -mlir::Value mlir::acc::getAccPtr(mlir::Operation *accDataClauseOp) { - auto accPtr{llvm::TypeSwitch(accDataClauseOp) +mlir::Value mlir::acc::getVar(mlir::Operation *accDataClauseOp) { + auto varPtr{ + llvm::TypeSwitch(accDataClauseOp) + .Case([&](auto entry) { return entry.getVar(); }) + .Default([&](mlir::Operation *) { return mlir::Value(); })}; + return varPtr; +} + +mlir::Type mlir::acc::getVarType(mlir::Operation *accDataClauseOp) { + auto varType{llvm::TypeSwitch(accDataClauseOp) + .Case( + [&](auto entry) { return entry.getVarType(); }) + .Case( + [&](auto exit) { return exit.getVarType(); }) + .Default([&](mlir::Operation *) { return mlir::Type(); })}; + return varType; +} + +mlir::TypedValue +mlir::acc::getAccPtr(mlir::Operation *accDataClauseOp) { + auto accPtr{llvm::TypeSwitch>( + accDataClauseOp) .Case( [&](auto dataClause) { return dataClause.getAccPtr(); }) + .Default([&](mlir::Operation *) { + return mlir::TypedValue(); + })}; + return accPtr; +} + +mlir::Value mlir::acc::getAccVar(mlir::Operation *accDataClauseOp) { + auto accPtr{llvm::TypeSwitch(accDataClauseOp) + .Case( + [&](auto dataClause) { return dataClause.getAccVar(); }) .Default([&](mlir::Operation *) { return mlir::Value(); })}; return accPtr; } diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp index fbdada9309d32..cfb8aa767b6f8 100644 --- a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp +++ b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp @@ -9,9 +9,11 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/OwningOpRef.h" +#include "mlir/IR/Value.h" #include "gtest/gtest.h" using namespace mlir; @@ -446,10 +448,12 @@ void testShortDataEntryOpBuilders(OpBuilder &b, MLIRContext &context, OwningOpRef varPtrOp = b.create(loc, memrefTy); - OwningOpRef op = b.create(loc, varPtrOp->getResult(), + TypedValue varPtr = + cast>(varPtrOp->getResult()); + OwningOpRef op = b.create(loc, varPtr, /*structured=*/true, /*implicit=*/true); - EXPECT_EQ(op->getVarPtr(), varPtrOp->getResult()); + EXPECT_EQ(op->getVarPtr(), varPtr); EXPECT_EQ(op->getType(), memrefTy); EXPECT_EQ(op->getDataClause(), dataClause); EXPECT_TRUE(op->getImplicit()); @@ -457,7 +461,7 @@ void testShortDataEntryOpBuilders(OpBuilder &b, MLIRContext &context, EXPECT_TRUE(op->getBounds().empty()); EXPECT_FALSE(op->getVarPtrPtr()); - OwningOpRef op2 = b.create(loc, varPtrOp->getResult(), + OwningOpRef op2 = b.create(loc, varPtr, /*structured=*/false, /*implicit=*/false); EXPECT_FALSE(op2->getImplicit()); EXPECT_FALSE(op2->getStructured()); @@ -467,13 +471,13 @@ void testShortDataEntryOpBuilders(OpBuilder &b, MLIRContext &context, OwningOpRef bounds = b.create(loc, extent->getResult()); OwningOpRef opWithBounds = - b.create(loc, varPtrOp->getResult(), + b.create(loc, varPtr, /*structured=*/true, /*implicit=*/true, bounds->getResult()); EXPECT_FALSE(opWithBounds->getBounds().empty()); EXPECT_EQ(opWithBounds->getBounds().back(), bounds->getResult()); OwningOpRef opWithName = - b.create(loc, varPtrOp->getResult(), + b.create(loc, varPtr, /*structured=*/true, /*implicit=*/true, "varName"); EXPECT_EQ(opWithName->getNameAttr().str(), "varName"); } @@ -516,23 +520,26 @@ void testShortDataExitOpBuilders(OpBuilder &b, MLIRContext &context, auto memrefTy = MemRefType::get({}, b.getI32Type()); OwningOpRef varPtrOp = b.create(loc, memrefTy); + TypedValue varPtr = + cast>(varPtrOp->getResult()); + OwningOpRef accPtrOp = b.create( - loc, varPtrOp->getResult(), /*structured=*/true, /*implicit=*/true); + loc, varPtr, /*structured=*/true, /*implicit=*/true); + TypedValue accPtr = + cast>(accPtrOp->getResult()); - OwningOpRef op = - b.create(loc, accPtrOp->getResult(), varPtrOp->getResult(), - /*structured=*/true, /*implicit=*/true); + OwningOpRef op = b.create(loc, accPtr, varPtr, + /*structured=*/true, /*implicit=*/true); - EXPECT_EQ(op->getVarPtr(), varPtrOp->getResult()); - EXPECT_EQ(op->getAccPtr(), accPtrOp->getResult()); + EXPECT_EQ(op->getVarPtr(), varPtr); + EXPECT_EQ(op->getAccPtr(), accPtr); EXPECT_EQ(op->getDataClause(), dataClause); EXPECT_TRUE(op->getImplicit()); EXPECT_TRUE(op->getStructured()); EXPECT_TRUE(op->getBounds().empty()); - OwningOpRef op2 = - b.create(loc, accPtrOp->getResult(), varPtrOp->getResult(), - /*structured=*/false, /*implicit=*/false); + OwningOpRef op2 = b.create(loc, accPtr, varPtr, + /*structured=*/false, /*implicit=*/false); EXPECT_FALSE(op2->getImplicit()); EXPECT_FALSE(op2->getStructured()); @@ -541,13 +548,13 @@ void testShortDataExitOpBuilders(OpBuilder &b, MLIRContext &context, OwningOpRef bounds = b.create(loc, extent->getResult()); OwningOpRef opWithBounds = - b.create(loc, accPtrOp->getResult(), varPtrOp->getResult(), + b.create(loc, accPtr, varPtr, /*structured=*/true, /*implicit=*/true, bounds->getResult()); EXPECT_FALSE(opWithBounds->getBounds().empty()); EXPECT_EQ(opWithBounds->getBounds().back(), bounds->getResult()); OwningOpRef opWithName = - b.create(loc, accPtrOp->getResult(), varPtrOp->getResult(), + b.create(loc, accPtr, varPtr, /*structured=*/true, /*implicit=*/true, "varName"); EXPECT_EQ(opWithName->getNameAttr().str(), "varName"); } @@ -565,19 +572,24 @@ void testShortDataExitNoVarPtrOpBuilders(OpBuilder &b, MLIRContext &context, auto memrefTy = MemRefType::get({}, b.getI32Type()); OwningOpRef varPtrOp = b.create(loc, memrefTy); + TypedValue varPtr = + cast>(varPtrOp->getResult()); + OwningOpRef accPtrOp = b.create( - loc, varPtrOp->getResult(), /*structured=*/true, /*implicit=*/true); + loc, varPtr, /*structured=*/true, /*implicit=*/true); + TypedValue accPtr = + cast>(accPtrOp->getResult()); - OwningOpRef op = b.create(loc, accPtrOp->getResult(), + OwningOpRef op = b.create(loc, accPtr, /*structured=*/true, /*implicit=*/true); - EXPECT_EQ(op->getAccPtr(), accPtrOp->getResult()); + EXPECT_EQ(op->getAccPtr(), accPtr); EXPECT_EQ(op->getDataClause(), dataClause); EXPECT_TRUE(op->getImplicit()); EXPECT_TRUE(op->getStructured()); EXPECT_TRUE(op->getBounds().empty()); - OwningOpRef op2 = b.create(loc, accPtrOp->getResult(), + OwningOpRef op2 = b.create(loc, accPtr, /*structured=*/false, /*implicit=*/false); EXPECT_FALSE(op2->getImplicit()); EXPECT_FALSE(op2->getStructured()); @@ -587,13 +599,13 @@ void testShortDataExitNoVarPtrOpBuilders(OpBuilder &b, MLIRContext &context, OwningOpRef bounds = b.create(loc, extent->getResult()); OwningOpRef opWithBounds = - b.create(loc, accPtrOp->getResult(), + b.create(loc, accPtr, /*structured=*/true, /*implicit=*/true, bounds->getResult()); EXPECT_FALSE(opWithBounds->getBounds().empty()); EXPECT_EQ(opWithBounds->getBounds().back(), bounds->getResult()); OwningOpRef opWithName = - b.create(loc, accPtrOp->getResult(), + b.create(loc, accPtr, /*structured=*/true, /*implicit=*/true, "varName"); EXPECT_EQ(opWithName->getNameAttr().str(), "varName"); } @@ -604,3 +616,75 @@ TEST_F(OpenACCOpsTest, shortDataExitOpNoVarPtrBuilder) { testShortDataExitNoVarPtrOpBuilders(b, context, loc, DataClause::acc_detach); } + +template +void testShortDataEntryOpBuildersMappableVar(OpBuilder &b, MLIRContext &context, + Location loc, + DataClause dataClause) { + auto int64Ty = b.getI64Type(); + auto memrefTy = MemRefType::get({}, int64Ty); + OwningOpRef varPtrOp = + b.create(loc, memrefTy); + SmallVector indices; + OwningOpRef loadVarOp = + b.create(loc, int64Ty, varPtrOp->getResult(), indices); + + EXPECT_TRUE(isMappableType(loadVarOp->getResult().getType())); + TypedValue var = + cast>(loadVarOp->getResult()); + OwningOpRef op = b.create(loc, var, + /*structured=*/true, /*implicit=*/true); + + EXPECT_EQ(op->getVar(), var); + EXPECT_EQ(op->getVarPtr(), nullptr); + EXPECT_EQ(op->getType(), int64Ty); + EXPECT_EQ(op->getVarType(), int64Ty); + EXPECT_EQ(op->getDataClause(), dataClause); + EXPECT_TRUE(op->getImplicit()); + EXPECT_TRUE(op->getStructured()); + EXPECT_TRUE(op->getBounds().empty()); + EXPECT_FALSE(op->getVarPtrPtr()); +} + +struct IntegerOpenACCMappableModel + : public mlir::acc::MappableType::ExternalModel {}; + +TEST_F(OpenACCOpsTest, mappableTypeBuilderDataEntry) { + // First, set up the test by attaching MappableInterface to IntegerType. + IntegerType i64ty = IntegerType::get(&context, 8); + ASSERT_FALSE(isMappableType(i64ty)); + IntegerType::attachInterface(context); + ASSERT_TRUE(isMappableType(i64ty)); + + testShortDataEntryOpBuildersMappableVar(b, context, loc, + DataClause::acc_private); + testShortDataEntryOpBuildersMappableVar( + b, context, loc, DataClause::acc_firstprivate); + testShortDataEntryOpBuildersMappableVar( + b, context, loc, DataClause::acc_reduction); + testShortDataEntryOpBuildersMappableVar( + b, context, loc, DataClause::acc_deviceptr); + testShortDataEntryOpBuildersMappableVar(b, context, loc, + DataClause::acc_present); + testShortDataEntryOpBuildersMappableVar(b, context, loc, + DataClause::acc_copyin); + testShortDataEntryOpBuildersMappableVar(b, context, loc, + DataClause::acc_create); + testShortDataEntryOpBuildersMappableVar( + b, context, loc, DataClause::acc_no_create); + testShortDataEntryOpBuildersMappableVar(b, context, loc, + DataClause::acc_attach); + testShortDataEntryOpBuildersMappableVar( + b, context, loc, DataClause::acc_getdeviceptr); + testShortDataEntryOpBuildersMappableVar( + b, context, loc, DataClause::acc_update_device); + testShortDataEntryOpBuildersMappableVar( + b, context, loc, DataClause::acc_use_device); + testShortDataEntryOpBuildersMappableVar( + b, context, loc, DataClause::acc_declare_device_resident); + testShortDataEntryOpBuildersMappableVar( + b, context, loc, DataClause::acc_declare_link); + testShortDataEntryOpBuildersMappableVar(b, context, loc, + DataClause::acc_cache); +} From d5145715f7e0866fe6526f9f09558dd698344d52 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 13:31:02 -0500 Subject: [PATCH 25/79] [RISCV][VLOPT] Add vfirst and vcpop to getOperandInfo (#122295) --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 4 ++ .../test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 60 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 9a0938bc38dd4..2c04dd062670f 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -488,6 +488,10 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VFCVT_F_X_V: // Vector Floating-Point Merge Instruction case RISCV::VFMERGE_VFM: + // Vector count population in mask vcpop.m + // vfirst find-first-set mask bit + case RISCV::VCPOP_M: + case RISCV::VFIRST_M: return MILog2SEW; // Vector Widening Integer Add/Subtract diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir index 389a7ff1fdbc3..fe0929a6f8745 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir @@ -1438,3 +1438,63 @@ body: | %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 5 /* e32 */, 0 %y:vr = PseudoVFREDMAX_VS_MF2_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 ... +--- +name: vfirst_v +body: | + bb.0: + ; CHECK-LABEL: name: vfirst_v + ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, 1, 0 /* e8 */ + ; CHECK-NEXT: %y:gpr = PseudoVFIRST_M_B8 %x, 1, 0 /* e8 */ + %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 + %y:gpr = PseudoVFIRST_M_B8 %x, 1, 0 +... +--- +name: vfirst_v_incompatible_eew +body: | + bb.0: + ; CHECK-LABEL: name: vfirst_v_incompatible_eew + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:gpr = PseudoVFIRST_M_B8 %x, 1, 0 /* e8 */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + %y:gpr = PseudoVFIRST_M_B8 %x, 1, 0 +... +--- +name: vfirst_v_incompatible_emul +body: | + bb.0: + ; CHECK-LABEL: name: vfirst_v_incompatible_emul + ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 /* e8 */ + ; CHECK-NEXT: %y:gpr = PseudoVFIRST_M_B16 %x, 1, 0 /* e8 */ + %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 + %y:gpr = PseudoVFIRST_M_B16 %x, 1, 0 +... +--- +name: vcpop_v +body: | + bb.0: + ; CHECK-LABEL: name: vcpop_v + ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, 1, 0 /* e8 */ + ; CHECK-NEXT: %y:gpr = PseudoVCPOP_M_B8 %x, 1, 0 /* e8 */ + %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 + %y:gpr = PseudoVCPOP_M_B8 %x, 1, 0 +... +--- +name: vcopop_v_incompatible_eew +body: | + bb.0: + ; CHECK-LABEL: name: vcopop_v_incompatible_eew + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:gpr = PseudoVCPOP_M_B8 %x, 1, 0 /* e8 */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 + %y:gpr = PseudoVCPOP_M_B8 %x, 1, 0 +... +--- +name: vcpop_v_incompaitble_emul +body: | + bb.0: + ; CHECK-LABEL: name: vcpop_v_incompaitble_emul + ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 /* e8 */ + ; CHECK-NEXT: %y:gpr = PseudoVCPOP_M_B16 %x, 1, 0 /* e8 */ + %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 + %y:gpr = PseudoVCPOP_M_B16 %x, 1, 0 +... From 8e12037d38e2a9a1cfc6402be2b33283e3220bcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Thu, 9 Jan 2025 19:37:24 +0100 Subject: [PATCH 26/79] [flang] Fix finding system install of LLVM/Clang/MLIR in standalone builds (#120914) The changes in #87822 introduced a regression where Flang could no longer be built standalone without explicitly specifying all of LLVM_DIR, CLANG_DIR and MLIR_DIR. Restore the earlier logic that used these paths as hints, and supported finding system-wide LLVM install via default paths. Instead, make paths absolute after locating the packages, using the paths CMake determined. ----- @vzakhari, could you confirm that this doesn't break your use case? --- flang/CMakeLists.txt | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 68947eaa9c9bd..34b3fedc88494 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -89,13 +89,16 @@ if (FLANG_STANDALONE_BUILD) mark_as_advanced(LLVM_ENABLE_ASSERTIONS) endif() + # We need a pre-built/installed version of LLVM. + find_package(LLVM REQUIRED HINTS "${LLVM_DIR}") # If the user specifies a relative path to LLVM_DIR, the calls to include # LLVM modules fail. Append the absolute path to LLVM_DIR instead. get_filename_component(LLVM_DIR_ABSOLUTE ${LLVM_DIR} REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}) list(APPEND CMAKE_MODULE_PATH ${LLVM_DIR_ABSOLUTE}) - # We need a pre-built/installed version of LLVM. - find_package(LLVM REQUIRED HINTS "${LLVM_DIR_ABSOLUTE}") + + # TODO: Remove when libclangDriver is lifted out of Clang + find_package(Clang REQUIRED PATHS "${CLANG_DIR}") # Users might specify a path to CLANG_DIR that's: # * a full path, or @@ -104,17 +107,11 @@ if (FLANG_STANDALONE_BUILD) # cases. get_filename_component( CLANG_DIR_ABSOLUTE - ${CLANG_DIR} + ${Clang_DIR} REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}) list(APPEND CMAKE_MODULE_PATH ${CLANG_DIR_ABSOLUTE}) - # TODO: Remove when libclangDriver is lifted out of Clang - find_package(Clang REQUIRED PATHS "${CLANG_DIR_ABSOLUTE}" NO_DEFAULT_PATH) - if (NOT Clang_FOUND) - message(FATAL_ERROR "Failed to find Clang") - endif() - # If LLVM links to zlib we need the imported targets so we can too. if(LLVM_ENABLE_ZLIB) find_package(ZLIB REQUIRED) @@ -132,12 +129,12 @@ if (FLANG_STANDALONE_BUILD) include(AddClang) include(TableGen) + find_package(MLIR REQUIRED CONFIG HINTS ${MLIR_DIR}) # If the user specifies a relative path to MLIR_DIR, the calls to include # MLIR modules fail. Append the absolute path to MLIR_DIR instead. get_filename_component(MLIR_DIR_ABSOLUTE ${MLIR_DIR} REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}) list(APPEND CMAKE_MODULE_PATH ${MLIR_DIR_ABSOLUTE}) - find_package(MLIR REQUIRED CONFIG HINTS ${MLIR_DIR_ABSOLUTE}) # Use SYSTEM for the same reasons as for LLVM includes include_directories(SYSTEM ${MLIR_INCLUDE_DIRS}) include(AddMLIR) From 3ba46ddb7994c4fa63d3d34166b6f3dedb07a184 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Thu, 9 Jan 2025 13:38:28 -0500 Subject: [PATCH 27/79] [AMDGPU][True16][MC][NFC] add true16/fake16 test for gfx12 vop3c/3cx (#122316) This is a NFC. Duplicate mc test file for gfx12 vop3c/vop3cx to true16/fake16 mode and update it with +real-true16/-real-true16 flag. This is for the upcoming true16 changes --- .../MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s | 6647 +++++++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s | 8 +- .../MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s | 2975 ++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s | 8 +- llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s | 3414 +++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s | 4 +- .../MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s | 2595 +++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s | 4 +- .../MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s | 897 +++ llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s | 4 +- 10 files changed, 16542 insertions(+), 14 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s new file mode 100644 index 0000000000000..ebf537724393f --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s @@ -0,0 +1,6647 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_class_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_class_f16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30] + +v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_class_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_class_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_class_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_class_f32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_class_f32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_class_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_class_f32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7e,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30] + +v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_eq_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_eq_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_eq_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x12,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_eq_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x12,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_eq_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x12,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_eq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x12,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_eq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x12,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_eq_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_eq_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_eq_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_eq_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_eq_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_eq_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_eq_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_eq_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_eq_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_eq_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_eq_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_eq_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x42,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_eq_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_eq_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_eq_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_eq_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_eq_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_eq_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_eq_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_eq_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_eq_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_eq_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4a,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ge_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ge_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ge_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ge_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x16,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ge_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x16,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ge_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x16,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x16,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x16,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ge_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ge_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ge_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ge_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ge_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ge_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ge_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ge_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ge_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ge_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ge_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ge_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x46,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ge_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ge_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ge_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ge_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ge_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ge_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ge_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ge_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ge_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ge_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4e,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_gt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_gt_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_gt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_gt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x14,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_gt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x14,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_gt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x14,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_gt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x14,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_gt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x14,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_gt_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_gt_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_gt_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_gt_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_gt_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_gt_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_gt_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_gt_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_gt_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_gt_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_gt_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_gt_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x44,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_gt_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_gt_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_gt_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_gt_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_gt_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_gt_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_gt_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_gt_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_gt_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_gt_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4c,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_le_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_le_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_le_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_le_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_le_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x13,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_le_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x13,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_le_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_le_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x13,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_le_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x13,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_le_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x13,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_le_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_le_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_le_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_le_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_le_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_le_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_le_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_le_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_le_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_le_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_le_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_le_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_le_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_le_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_le_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_le_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x43,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_le_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_le_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_le_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_le_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_le_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_le_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_le_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_le_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_le_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_le_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_le_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_le_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_le_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_le_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4b,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_lg_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_lg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_lg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_lg_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_lg_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_lg_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x15,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_lg_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x15,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_lg_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x15,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_lg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x15,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x15,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_lt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_lt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x01,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_lt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x01,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_lt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x01,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_lt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x01,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_lt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x01,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_lt_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_lt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_lt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x11,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_lt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x11,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_lt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x11,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_lt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x11,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_lt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x11,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_lt_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_lt_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_lt_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_lt_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_lt_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_lt_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_lt_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_lt_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_lt_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_lt_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_lt_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_lt_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x41,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_lt_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_lt_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_lt_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_lt_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_lt_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_lt_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_lt_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_lt_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_lt_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_lt_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_lt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_lt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x49,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ne_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ne_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ne_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ne_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ne_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ne_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ne_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ne_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ne_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ne_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ne_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ne_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x45,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ne_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ne_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ne_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ne_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ne_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ne_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ne_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ne_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ne_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ne_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4d,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_neq_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_neq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_neq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_neq_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_neq_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_neq_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_neq_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_neq_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_nge_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_nge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_nge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_nge_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_nge_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_nge_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x19,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_nge_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x19,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_nge_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x19,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x19,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x19,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ngt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_ngt_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_ngt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_ngt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_ngt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_ngt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_nle_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_nle_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_nle_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_nle_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_nle_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_nle_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_nle_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_nle_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_nlg_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_nlg_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_nlg_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_nlg_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_nlg_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_nlg_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_nlt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_nlt_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_nlt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_nlt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_nlt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_nlt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_o_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_o_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_o_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_o_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_o_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_o_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x17,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_o_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x17,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_o_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_o_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x17,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x17,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x17,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_u_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: v_cmp_u_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: v_cmp_u_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, v2 row_mirror +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, v2 row_half_mirror +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, v2 row_shl:1 +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, v2 row_shl:15 +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, v2 row_shr:1 +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, v2 row_shr:15 +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, v2 row_ror:1 +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s105, v1, v2 row_ror:15 +// W32: v_cmp_u_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cmp_u_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cmp_u_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x18,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cmp_u_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x18,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_mirror +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_half_mirror +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:1 +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:15 +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shr:1 +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shr:15 +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_ror:1 +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_ror:15 +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cmp_u_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cmp_u_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x18,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cmp_u_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x18,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmp_u_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x18,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s index a508cbd661415..98462309b2a3b 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s new file mode 100644 index 0000000000000..5bdc27ce1974a --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s @@ -0,0 +1,2975 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_class_f16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7d,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] + +v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_class_f32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x7e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x7e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7e,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] + +v_cmp_eq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x02,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_eq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x12,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_eq_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x12,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x12,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_eq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x12,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_eq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x12,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_eq_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_eq_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x32,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x32,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x32,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_eq_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x32,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_eq_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_eq_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x42,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x42,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_eq_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x42,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x42,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x42,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_eq_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x42,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_eq_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x42,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_eq_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_eq_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3a,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_eq_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3a,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_eq_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_eq_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_eq_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4a,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4a,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4a,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_ge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x06,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_ge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x16,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ge_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x16,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x16,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x16,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x16,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_ge_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ge_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x36,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x36,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x36,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ge_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x36,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ge_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_ge_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ge_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x46,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x46,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x46,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ge_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x46,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ge_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x46,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_ge_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ge_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ge_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ge_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_ge_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ge_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4e,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_gt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x04,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_gt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x14,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_gt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x14,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x14,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_gt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x14,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_gt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x14,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_gt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_gt_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x34,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x34,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x34,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_gt_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x34,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_gt_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_gt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_gt_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x44,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x44,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x44,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_gt_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x44,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_gt_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x44,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_gt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_gt_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3c,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_gt_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3c,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_gt_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_gt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_gt_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4c,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4c,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4c,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_le_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x03,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_le_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x13,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_le_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x13,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x13,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_le_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x13,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_le_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x13,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_le_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_le_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x33,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x33,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x33,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_le_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x33,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_le_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_le_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x43,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x43,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_le_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x43,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x43,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x43,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_le_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x43,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_le_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x43,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_le_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_le_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3b,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_le_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3b,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_le_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_le_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_le_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4b,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4b,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4b,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_lg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x05,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_lg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x15,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_lg_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x15,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x15,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_lg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x15,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x15,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_lt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x01,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_lt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x01,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x01,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x01,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x01,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_lt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x01,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_lt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x01,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_lt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x11,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_lt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x11,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x11,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x11,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x11,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_lt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x11,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_lt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x11,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_lt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_lt_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x31,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_lt_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x31,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_lt_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_lt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_lt_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x41,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x41,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x41,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_lt_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x41,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_lt_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x41,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_lt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_lt_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x39,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x39,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x39,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_lt_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x39,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_lt_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_lt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_lt_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x49,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x49,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x49,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_lt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x49,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_lt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x49,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_ne_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ne_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x35,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x35,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x35,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ne_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x35,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ne_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_ne_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ne_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x45,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x45,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ne_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x45,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ne_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x45,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_ne_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ne_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ne_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ne_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_ne_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ne_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4d,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmp_neq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_neq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_neq_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_nge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x09,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_nge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x19,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_nge_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x19,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x19,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x19,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x19,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_ngt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_ngt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_ngt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_nle_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_nle_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_nle_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_nlg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_nlg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_nlg_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_nlt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_nlt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_nlt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_o_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x07,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_o_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x17,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_o_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x17,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x17,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x17,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x17,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_u_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x08,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmp_u_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x18,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cmp_u_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x18,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x18,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cmp_u_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x18,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmp_u_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x18,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s index 96a3d356ae282..4617914b502b3 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s new file mode 100644 index 0000000000000..a76830385004b --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s @@ -0,0 +1,3414 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_cmpx_class_f16_e64 v1, v2 +// GFX12: v_cmpx_class_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_class_f16_e64 v255, v2 +// GFX12: v_cmpx_class_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00] + +v_cmpx_class_f16_e64 s1, v2 +// GFX12: v_cmpx_class_f16_e64 s1, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x04,0x02,0x00] + +v_cmpx_class_f16_e64 s105, v255 +// GFX12: v_cmpx_class_f16_e64 s105, v255 ; encoding: [0x7e,0x00,0xfd,0xd4,0x69,0xfe,0x03,0x00] + +v_cmpx_class_f16_e64 vcc_lo, s2 +// GFX12: v_cmpx_class_f16_e64 vcc_lo, s2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x6a,0x04,0x00,0x00] + +v_cmpx_class_f16_e64 vcc_hi, s105 +// GFX12: v_cmpx_class_f16_e64 vcc_hi, s105 ; encoding: [0x7e,0x00,0xfd,0xd4,0x6b,0xd2,0x00,0x00] + +v_cmpx_class_f16_e64 ttmp15, ttmp15 +// GFX12: v_cmpx_class_f16_e64 ttmp15, ttmp15 ; encoding: [0x7e,0x00,0xfd,0xd4,0x7b,0xf6,0x00,0x00] + +v_cmpx_class_f16_e64 m0, src_scc +// GFX12: v_cmpx_class_f16_e64 m0, src_scc ; encoding: [0x7e,0x00,0xfd,0xd4,0x7d,0xfa,0x01,0x00] + +v_cmpx_class_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_class_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xfd,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_class_f16_e64 exec_hi, null +// GFX12: v_cmpx_class_f16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xfd,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_class_f16_e64 null, exec_lo +// GFX12: v_cmpx_class_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xfd,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_class_f16_e64 -1, exec_hi +// GFX12: v_cmpx_class_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xfd,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_class_f16_e64 0.5, m0 +// GFX12: v_cmpx_class_f16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xfd,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_class_f16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_class_f16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xfd,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi +// GFX12: v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi ; encoding: [0x7e,0x01,0xfd,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cmpx_class_f32_e64 v1, v2 +// GFX12: v_cmpx_class_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_class_f32_e64 v255, v255 +// GFX12: v_cmpx_class_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0xfe,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_class_f32_e64 s1, s2 +// GFX12: v_cmpx_class_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_class_f32_e64 s105, s105 +// GFX12: v_cmpx_class_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0xfe,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_class_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_class_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xfe,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_class_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_class_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xfe,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_class_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_class_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xfe,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_class_f32_e64 m0, 0.5 +// GFX12: v_cmpx_class_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xfe,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_class_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_class_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xfe,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_class_f32_e64 exec_hi, null +// GFX12: v_cmpx_class_f32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xfe,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_class_f32_e64 null, exec_lo +// GFX12: v_cmpx_class_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xfe,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_class_f32_e64 -1, exec_hi +// GFX12: v_cmpx_class_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xfe,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_class_f32_e64 0.5, m0 +// GFX12: v_cmpx_class_f32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xfe,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_class_f32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_class_f32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xfe,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_class_f32_e64 -|0xaf123456|, vcc_hi +// GFX12: v_cmpx_class_f32_e64 -|0xaf123456|, vcc_hi ; encoding: [0x7e,0x01,0xfe,0xd4,0xff,0xd6,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cmpx_class_f64_e64 v[1:2], v2 +// GFX12: v_cmpx_class_f64_e64 v[1:2], v2 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_class_f64_e64 v[1:2], v255 +// GFX12: v_cmpx_class_f64_e64 v[1:2], v255 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xff,0x03,0x00] + +v_cmpx_class_f64_e64 v[1:2], s2 +// GFX12: v_cmpx_class_f64_e64 v[1:2], s2 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0x05,0x00,0x00] + +v_cmpx_class_f64_e64 v[1:2], s105 +// GFX12: v_cmpx_class_f64_e64 v[1:2], s105 ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xd3,0x00,0x00] + +v_cmpx_class_f64_e64 v[254:255], ttmp15 +// GFX12: v_cmpx_class_f64_e64 v[254:255], ttmp15 ; encoding: [0x7e,0x00,0xff,0xd4,0xfe,0xf7,0x00,0x00] + +v_cmpx_class_f64_e64 s[2:3], vcc_hi +// GFX12: v_cmpx_class_f64_e64 s[2:3], vcc_hi ; encoding: [0x7e,0x00,0xff,0xd4,0x02,0xd6,0x00,0x00] + +v_cmpx_class_f64_e64 s[104:105], vcc_lo +// GFX12: v_cmpx_class_f64_e64 s[104:105], vcc_lo ; encoding: [0x7e,0x00,0xff,0xd4,0x68,0xd4,0x00,0x00] + +v_cmpx_class_f64_e64 vcc, m0 +// GFX12: v_cmpx_class_f64_e64 vcc, m0 ; encoding: [0x7e,0x00,0xff,0xd4,0x6a,0xfa,0x00,0x00] + +v_cmpx_class_f64_e64 ttmp[14:15], exec_hi +// GFX12: v_cmpx_class_f64_e64 ttmp[14:15], exec_hi ; encoding: [0x7e,0x00,0xff,0xd4,0x7a,0xfe,0x00,0x00] + +v_cmpx_class_f64_e64 exec, exec_lo +// GFX12: v_cmpx_class_f64_e64 exec, exec_lo ; encoding: [0x7e,0x00,0xff,0xd4,0x7e,0xfc,0x00,0x00] + +v_cmpx_class_f64_e64 null, null +// GFX12: v_cmpx_class_f64_e64 null, null ; encoding: [0x7e,0x00,0xff,0xd4,0x7c,0xf8,0x00,0x00] + +v_cmpx_class_f64_e64 -1, -1 +// GFX12: v_cmpx_class_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xff,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_class_f64_e64 0.5, 0.5 +// GFX12: v_cmpx_class_f64_e64 0.5, 0.5 ; encoding: [0x7e,0x00,0xff,0xd4,0xf0,0xe0,0x01,0x00] + +v_cmpx_class_f64_e64 -|src_scc|, src_scc +// GFX12: v_cmpx_class_f64_e64 -|src_scc|, src_scc ; encoding: [0x7e,0x01,0xff,0xd4,0xfd,0xfa,0x01,0x20] + +v_cmpx_class_f64_e64 0xaf123456, 0xaf123456 +// GFX12: v_cmpx_class_f64_e64 0xaf123456, 0xaf123456 ; encoding: [0x7e,0x00,0xff,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_f16_e64 v1, v2 +// GFX12: v_cmpx_eq_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_eq_f16_e64 v255, v255 +// GFX12: v_cmpx_eq_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x82,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_eq_f16_e64 s1, s2 +// GFX12: v_cmpx_eq_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_eq_f16_e64 s105, s105 +// GFX12: v_cmpx_eq_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x82,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_eq_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_eq_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x82,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_eq_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_eq_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x82,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_eq_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x82,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_eq_f16_e64 m0, 0.5 +// GFX12: v_cmpx_eq_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x82,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_eq_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_eq_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x82,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_eq_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_eq_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x82,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_eq_f16_e64 null, exec_lo +// GFX12: v_cmpx_eq_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x82,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_eq_f16_e64 -1, exec_hi +// GFX12: v_cmpx_eq_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x82,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_eq_f16_e64 0.5, -m0 +// GFX12: v_cmpx_eq_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x82,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_eq_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_eq_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x82,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_eq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_eq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x82,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_f32_e64 v1, v2 +// GFX12: v_cmpx_eq_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_eq_f32_e64 v255, v255 +// GFX12: v_cmpx_eq_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x92,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_eq_f32_e64 s1, s2 +// GFX12: v_cmpx_eq_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_eq_f32_e64 s105, s105 +// GFX12: v_cmpx_eq_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x92,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_eq_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_eq_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x92,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_eq_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_eq_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x92,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_eq_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x92,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_eq_f32_e64 m0, 0.5 +// GFX12: v_cmpx_eq_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x92,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_eq_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_eq_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x92,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_eq_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_eq_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x92,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_eq_f32_e64 null, exec_lo +// GFX12: v_cmpx_eq_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x92,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_eq_f32_e64 -1, exec_hi +// GFX12: v_cmpx_eq_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x92,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_eq_f32_e64 0.5, -m0 +// GFX12: v_cmpx_eq_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x92,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_eq_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_eq_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x92,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_eq_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_eq_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x92,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_eq_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_eq_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_eq_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa2,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_eq_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_eq_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xa2,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_eq_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_eq_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa2,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_eq_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_eq_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xa2,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_eq_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_eq_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa2,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_eq_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xa2,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_eq_f64_e64 null, 0.5 +// GFX12: v_cmpx_eq_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xa2,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_eq_f64_e64 -1, -1 +// GFX12: v_cmpx_eq_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xa2,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_eq_f64_e64 0.5, null +// GFX12: v_cmpx_eq_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xa2,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_eq_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_eq_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xa2,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_eq_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_eq_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa2,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_i16_e64 v1, v2 +// GFX12: v_cmpx_eq_i16_e64 v1, v2 ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_eq_i16_e64 v255, v255 +// GFX12: v_cmpx_eq_i16_e64 v255, v255 ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_eq_i16_e64 s1, s2 +// GFX12: v_cmpx_eq_i16_e64 s1, s2 ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_eq_i16_e64 s105, s105 +// GFX12: v_cmpx_eq_i16_e64 s105, s105 ; encoding: [0x7e,0x00,0xb2,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_eq_i16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_eq_i16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xb2,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_eq_i16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_eq_i16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xb2,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_i16_e64 ttmp15, src_scc +// GFX12: v_cmpx_eq_i16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xb2,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_eq_i16_e64 m0, 0.5 +// GFX12: v_cmpx_eq_i16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xb2,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_eq_i16_e64 exec_lo, -1 +// GFX12: v_cmpx_eq_i16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xb2,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_eq_i16_e64 exec_hi, null +// GFX12: v_cmpx_eq_i16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xb2,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_eq_i16_e64 null, exec_lo +// GFX12: v_cmpx_eq_i16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xb2,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_eq_i16_e64 -1, exec_hi +// GFX12: v_cmpx_eq_i16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xb2,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_eq_i16_e64 0.5, m0 +// GFX12: v_cmpx_eq_i16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xb2,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_eq_i16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_eq_i16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xb2,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_eq_i16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_eq_i16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_i32_e64 v1, v2 +// GFX12: v_cmpx_eq_i32_e64 v1, v2 ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_eq_i32_e64 v255, v255 +// GFX12: v_cmpx_eq_i32_e64 v255, v255 ; encoding: [0x7e,0x00,0xc2,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_eq_i32_e64 s1, s2 +// GFX12: v_cmpx_eq_i32_e64 s1, s2 ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_eq_i32_e64 s105, s105 +// GFX12: v_cmpx_eq_i32_e64 s105, s105 ; encoding: [0x7e,0x00,0xc2,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_eq_i32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_eq_i32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xc2,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_eq_i32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_eq_i32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xc2,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_i32_e64 ttmp15, src_scc +// GFX12: v_cmpx_eq_i32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xc2,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_eq_i32_e64 m0, 0.5 +// GFX12: v_cmpx_eq_i32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xc2,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_eq_i32_e64 exec_lo, -1 +// GFX12: v_cmpx_eq_i32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xc2,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_eq_i32_e64 exec_hi, null +// GFX12: v_cmpx_eq_i32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xc2,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_eq_i32_e64 null, exec_lo +// GFX12: v_cmpx_eq_i32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xc2,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_eq_i32_e64 -1, exec_hi +// GFX12: v_cmpx_eq_i32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xc2,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_eq_i32_e64 0.5, m0 +// GFX12: v_cmpx_eq_i32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xc2,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_eq_i32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_eq_i32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xc2,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_eq_i32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_eq_i32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xc2,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_i64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_eq_i64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_eq_i64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_eq_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd2,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_eq_i64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_eq_i64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xd2,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_eq_i64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_eq_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd2,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_eq_i64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_eq_i64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xd2,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_eq_i64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_eq_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd2,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_i64_e64 exec, src_scc +// GFX12: v_cmpx_eq_i64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xd2,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_eq_i64_e64 null, 0.5 +// GFX12: v_cmpx_eq_i64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xd2,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_eq_i64_e64 -1, -1 +// GFX12: v_cmpx_eq_i64_e64 -1, -1 ; encoding: [0x7e,0x00,0xd2,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_eq_i64_e64 0.5, null +// GFX12: v_cmpx_eq_i64_e64 0.5, null ; encoding: [0x7e,0x00,0xd2,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_eq_i64_e64 src_scc, exec +// GFX12: v_cmpx_eq_i64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xd2,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_eq_i64_e64 0xaf123456, vcc +// GFX12: v_cmpx_eq_i64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xd2,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_u16_e64 v1, v2 +// GFX12: v_cmpx_eq_u16_e64 v1, v2 ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_eq_u16_e64 v255, v255 +// GFX12: v_cmpx_eq_u16_e64 v255, v255 ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_eq_u16_e64 s1, s2 +// GFX12: v_cmpx_eq_u16_e64 s1, s2 ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_eq_u16_e64 s105, s105 +// GFX12: v_cmpx_eq_u16_e64 s105, s105 ; encoding: [0x7e,0x00,0xba,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_eq_u16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_eq_u16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xba,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_eq_u16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_eq_u16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xba,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_u16_e64 ttmp15, src_scc +// GFX12: v_cmpx_eq_u16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xba,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_eq_u16_e64 m0, 0.5 +// GFX12: v_cmpx_eq_u16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xba,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_eq_u16_e64 exec_lo, -1 +// GFX12: v_cmpx_eq_u16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xba,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_eq_u16_e64 exec_hi, null +// GFX12: v_cmpx_eq_u16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xba,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_eq_u16_e64 null, exec_lo +// GFX12: v_cmpx_eq_u16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xba,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_eq_u16_e64 -1, exec_hi +// GFX12: v_cmpx_eq_u16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xba,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_eq_u16_e64 0.5, m0 +// GFX12: v_cmpx_eq_u16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xba,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_eq_u16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_eq_u16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xba,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_eq_u16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_eq_u16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_u32_e64 v1, v2 +// GFX12: v_cmpx_eq_u32_e64 v1, v2 ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_eq_u32_e64 v255, v255 +// GFX12: v_cmpx_eq_u32_e64 v255, v255 ; encoding: [0x7e,0x00,0xca,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_eq_u32_e64 s1, s2 +// GFX12: v_cmpx_eq_u32_e64 s1, s2 ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_eq_u32_e64 s105, s105 +// GFX12: v_cmpx_eq_u32_e64 s105, s105 ; encoding: [0x7e,0x00,0xca,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_eq_u32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_eq_u32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xca,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_eq_u32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_eq_u32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xca,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_u32_e64 ttmp15, src_scc +// GFX12: v_cmpx_eq_u32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xca,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_eq_u32_e64 m0, 0.5 +// GFX12: v_cmpx_eq_u32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xca,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_eq_u32_e64 exec_lo, -1 +// GFX12: v_cmpx_eq_u32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xca,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_eq_u32_e64 exec_hi, null +// GFX12: v_cmpx_eq_u32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xca,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_eq_u32_e64 null, exec_lo +// GFX12: v_cmpx_eq_u32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xca,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_eq_u32_e64 -1, exec_hi +// GFX12: v_cmpx_eq_u32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xca,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_eq_u32_e64 0.5, m0 +// GFX12: v_cmpx_eq_u32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xca,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_eq_u32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_eq_u32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xca,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_eq_u32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_eq_u32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xca,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_u64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_eq_u64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_eq_u64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_eq_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xda,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_eq_u64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_eq_u64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xda,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_eq_u64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_eq_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xda,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_eq_u64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_eq_u64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xda,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_eq_u64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_eq_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xda,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_u64_e64 exec, src_scc +// GFX12: v_cmpx_eq_u64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xda,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_eq_u64_e64 null, 0.5 +// GFX12: v_cmpx_eq_u64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xda,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_eq_u64_e64 -1, -1 +// GFX12: v_cmpx_eq_u64_e64 -1, -1 ; encoding: [0x7e,0x00,0xda,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_eq_u64_e64 0.5, null +// GFX12: v_cmpx_eq_u64_e64 0.5, null ; encoding: [0x7e,0x00,0xda,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_eq_u64_e64 src_scc, exec +// GFX12: v_cmpx_eq_u64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xda,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_eq_u64_e64 0xaf123456, vcc +// GFX12: v_cmpx_eq_u64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xda,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_f16_e64 v1, v2 +// GFX12: v_cmpx_ge_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ge_f16_e64 v255, v255 +// GFX12: v_cmpx_ge_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x86,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ge_f16_e64 s1, s2 +// GFX12: v_cmpx_ge_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ge_f16_e64 s105, s105 +// GFX12: v_cmpx_ge_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x86,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ge_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ge_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x86,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ge_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_ge_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x86,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_ge_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x86,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ge_f16_e64 m0, 0.5 +// GFX12: v_cmpx_ge_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x86,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ge_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_ge_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x86,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ge_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_ge_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x86,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ge_f16_e64 null, exec_lo +// GFX12: v_cmpx_ge_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x86,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ge_f16_e64 -1, exec_hi +// GFX12: v_cmpx_ge_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x86,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ge_f16_e64 0.5, -m0 +// GFX12: v_cmpx_ge_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x86,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_ge_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_ge_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x86,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_ge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_ge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x86,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_f32_e64 v1, v2 +// GFX12: v_cmpx_ge_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ge_f32_e64 v255, v255 +// GFX12: v_cmpx_ge_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x96,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ge_f32_e64 s1, s2 +// GFX12: v_cmpx_ge_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ge_f32_e64 s105, s105 +// GFX12: v_cmpx_ge_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x96,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ge_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ge_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x96,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ge_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_ge_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x96,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_ge_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x96,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ge_f32_e64 m0, 0.5 +// GFX12: v_cmpx_ge_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x96,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ge_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_ge_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x96,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ge_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_ge_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x96,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ge_f32_e64 null, exec_lo +// GFX12: v_cmpx_ge_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x96,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ge_f32_e64 -1, exec_hi +// GFX12: v_cmpx_ge_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x96,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ge_f32_e64 0.5, -m0 +// GFX12: v_cmpx_ge_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x96,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_ge_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_ge_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x96,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_ge_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_ge_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x96,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_ge_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ge_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_ge_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa6,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_ge_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_ge_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xa6,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_ge_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_ge_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa6,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_ge_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_ge_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xa6,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_ge_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_ge_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa6,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_ge_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xa6,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_ge_f64_e64 null, 0.5 +// GFX12: v_cmpx_ge_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xa6,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_ge_f64_e64 -1, -1 +// GFX12: v_cmpx_ge_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xa6,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_ge_f64_e64 0.5, null +// GFX12: v_cmpx_ge_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xa6,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_ge_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_ge_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xa6,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_ge_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_ge_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa6,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_i16_e64 v1, v2 +// GFX12: v_cmpx_ge_i16_e64 v1, v2 ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ge_i16_e64 v255, v255 +// GFX12: v_cmpx_ge_i16_e64 v255, v255 ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ge_i16_e64 s1, s2 +// GFX12: v_cmpx_ge_i16_e64 s1, s2 ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ge_i16_e64 s105, s105 +// GFX12: v_cmpx_ge_i16_e64 s105, s105 ; encoding: [0x7e,0x00,0xb6,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ge_i16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ge_i16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xb6,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ge_i16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_ge_i16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xb6,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_i16_e64 ttmp15, src_scc +// GFX12: v_cmpx_ge_i16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xb6,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ge_i16_e64 m0, 0.5 +// GFX12: v_cmpx_ge_i16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xb6,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ge_i16_e64 exec_lo, -1 +// GFX12: v_cmpx_ge_i16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xb6,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ge_i16_e64 exec_hi, null +// GFX12: v_cmpx_ge_i16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xb6,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ge_i16_e64 null, exec_lo +// GFX12: v_cmpx_ge_i16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xb6,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ge_i16_e64 -1, exec_hi +// GFX12: v_cmpx_ge_i16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xb6,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ge_i16_e64 0.5, m0 +// GFX12: v_cmpx_ge_i16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xb6,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_ge_i16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_ge_i16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xb6,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_ge_i16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_ge_i16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_i32_e64 v1, v2 +// GFX12: v_cmpx_ge_i32_e64 v1, v2 ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ge_i32_e64 v255, v255 +// GFX12: v_cmpx_ge_i32_e64 v255, v255 ; encoding: [0x7e,0x00,0xc6,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ge_i32_e64 s1, s2 +// GFX12: v_cmpx_ge_i32_e64 s1, s2 ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ge_i32_e64 s105, s105 +// GFX12: v_cmpx_ge_i32_e64 s105, s105 ; encoding: [0x7e,0x00,0xc6,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ge_i32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ge_i32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xc6,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ge_i32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_ge_i32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xc6,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_i32_e64 ttmp15, src_scc +// GFX12: v_cmpx_ge_i32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xc6,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ge_i32_e64 m0, 0.5 +// GFX12: v_cmpx_ge_i32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xc6,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ge_i32_e64 exec_lo, -1 +// GFX12: v_cmpx_ge_i32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xc6,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ge_i32_e64 exec_hi, null +// GFX12: v_cmpx_ge_i32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xc6,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ge_i32_e64 null, exec_lo +// GFX12: v_cmpx_ge_i32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xc6,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ge_i32_e64 -1, exec_hi +// GFX12: v_cmpx_ge_i32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xc6,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ge_i32_e64 0.5, m0 +// GFX12: v_cmpx_ge_i32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xc6,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_ge_i32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_ge_i32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xc6,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_ge_i32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_ge_i32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xc6,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_i64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_ge_i64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ge_i64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_ge_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd6,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_ge_i64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_ge_i64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xd6,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_ge_i64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_ge_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd6,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_ge_i64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_ge_i64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xd6,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_ge_i64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_ge_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd6,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_i64_e64 exec, src_scc +// GFX12: v_cmpx_ge_i64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xd6,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_ge_i64_e64 null, 0.5 +// GFX12: v_cmpx_ge_i64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xd6,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_ge_i64_e64 -1, -1 +// GFX12: v_cmpx_ge_i64_e64 -1, -1 ; encoding: [0x7e,0x00,0xd6,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_ge_i64_e64 0.5, null +// GFX12: v_cmpx_ge_i64_e64 0.5, null ; encoding: [0x7e,0x00,0xd6,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_ge_i64_e64 src_scc, exec +// GFX12: v_cmpx_ge_i64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xd6,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_ge_i64_e64 0xaf123456, vcc +// GFX12: v_cmpx_ge_i64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xd6,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_u16_e64 v1, v2 +// GFX12: v_cmpx_ge_u16_e64 v1, v2 ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ge_u16_e64 v255, v255 +// GFX12: v_cmpx_ge_u16_e64 v255, v255 ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ge_u16_e64 s1, s2 +// GFX12: v_cmpx_ge_u16_e64 s1, s2 ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ge_u16_e64 s105, s105 +// GFX12: v_cmpx_ge_u16_e64 s105, s105 ; encoding: [0x7e,0x00,0xbe,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ge_u16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ge_u16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xbe,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ge_u16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_ge_u16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xbe,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_u16_e64 ttmp15, src_scc +// GFX12: v_cmpx_ge_u16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xbe,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ge_u16_e64 m0, 0.5 +// GFX12: v_cmpx_ge_u16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xbe,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ge_u16_e64 exec_lo, -1 +// GFX12: v_cmpx_ge_u16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xbe,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ge_u16_e64 exec_hi, null +// GFX12: v_cmpx_ge_u16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xbe,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ge_u16_e64 null, exec_lo +// GFX12: v_cmpx_ge_u16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xbe,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ge_u16_e64 -1, exec_hi +// GFX12: v_cmpx_ge_u16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xbe,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ge_u16_e64 0.5, m0 +// GFX12: v_cmpx_ge_u16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xbe,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_ge_u16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_ge_u16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xbe,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_ge_u16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_ge_u16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_u32_e64 v1, v2 +// GFX12: v_cmpx_ge_u32_e64 v1, v2 ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ge_u32_e64 v255, v255 +// GFX12: v_cmpx_ge_u32_e64 v255, v255 ; encoding: [0x7e,0x00,0xce,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ge_u32_e64 s1, s2 +// GFX12: v_cmpx_ge_u32_e64 s1, s2 ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ge_u32_e64 s105, s105 +// GFX12: v_cmpx_ge_u32_e64 s105, s105 ; encoding: [0x7e,0x00,0xce,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ge_u32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ge_u32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xce,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ge_u32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_ge_u32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xce,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_u32_e64 ttmp15, src_scc +// GFX12: v_cmpx_ge_u32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xce,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ge_u32_e64 m0, 0.5 +// GFX12: v_cmpx_ge_u32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xce,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ge_u32_e64 exec_lo, -1 +// GFX12: v_cmpx_ge_u32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xce,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ge_u32_e64 exec_hi, null +// GFX12: v_cmpx_ge_u32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xce,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ge_u32_e64 null, exec_lo +// GFX12: v_cmpx_ge_u32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xce,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ge_u32_e64 -1, exec_hi +// GFX12: v_cmpx_ge_u32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xce,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ge_u32_e64 0.5, m0 +// GFX12: v_cmpx_ge_u32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xce,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_ge_u32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_ge_u32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xce,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_ge_u32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_ge_u32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xce,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_u64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_ge_u64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ge_u64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_ge_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xde,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_ge_u64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_ge_u64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xde,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_ge_u64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_ge_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xde,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_ge_u64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_ge_u64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xde,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_ge_u64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_ge_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xde,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_u64_e64 exec, src_scc +// GFX12: v_cmpx_ge_u64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xde,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_ge_u64_e64 null, 0.5 +// GFX12: v_cmpx_ge_u64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xde,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_ge_u64_e64 -1, -1 +// GFX12: v_cmpx_ge_u64_e64 -1, -1 ; encoding: [0x7e,0x00,0xde,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_ge_u64_e64 0.5, null +// GFX12: v_cmpx_ge_u64_e64 0.5, null ; encoding: [0x7e,0x00,0xde,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_ge_u64_e64 src_scc, exec +// GFX12: v_cmpx_ge_u64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xde,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_ge_u64_e64 0xaf123456, vcc +// GFX12: v_cmpx_ge_u64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xde,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_f16_e64 v1, v2 +// GFX12: v_cmpx_gt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_gt_f16_e64 v255, v255 +// GFX12: v_cmpx_gt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x84,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_gt_f16_e64 s1, s2 +// GFX12: v_cmpx_gt_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_gt_f16_e64 s105, s105 +// GFX12: v_cmpx_gt_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x84,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_gt_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_gt_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x84,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_gt_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_gt_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x84,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_gt_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x84,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_gt_f16_e64 m0, 0.5 +// GFX12: v_cmpx_gt_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x84,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_gt_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_gt_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x84,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_gt_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_gt_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x84,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_gt_f16_e64 null, exec_lo +// GFX12: v_cmpx_gt_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x84,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_gt_f16_e64 -1, exec_hi +// GFX12: v_cmpx_gt_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x84,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_gt_f16_e64 0.5, -m0 +// GFX12: v_cmpx_gt_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x84,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_gt_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_gt_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x84,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_gt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_gt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x84,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_f32_e64 v1, v2 +// GFX12: v_cmpx_gt_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_gt_f32_e64 v255, v255 +// GFX12: v_cmpx_gt_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x94,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_gt_f32_e64 s1, s2 +// GFX12: v_cmpx_gt_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_gt_f32_e64 s105, s105 +// GFX12: v_cmpx_gt_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x94,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_gt_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_gt_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x94,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_gt_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_gt_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x94,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_gt_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x94,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_gt_f32_e64 m0, 0.5 +// GFX12: v_cmpx_gt_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x94,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_gt_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_gt_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x94,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_gt_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_gt_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x94,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_gt_f32_e64 null, exec_lo +// GFX12: v_cmpx_gt_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x94,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_gt_f32_e64 -1, exec_hi +// GFX12: v_cmpx_gt_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x94,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_gt_f32_e64 0.5, -m0 +// GFX12: v_cmpx_gt_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x94,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_gt_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_gt_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x94,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_gt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_gt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x94,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_gt_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_gt_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_gt_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa4,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_gt_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_gt_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xa4,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_gt_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_gt_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa4,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_gt_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_gt_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xa4,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_gt_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_gt_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa4,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_gt_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xa4,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_gt_f64_e64 null, 0.5 +// GFX12: v_cmpx_gt_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xa4,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_gt_f64_e64 -1, -1 +// GFX12: v_cmpx_gt_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xa4,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_gt_f64_e64 0.5, null +// GFX12: v_cmpx_gt_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xa4,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_gt_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_gt_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xa4,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_gt_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_gt_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa4,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_i16_e64 v1, v2 +// GFX12: v_cmpx_gt_i16_e64 v1, v2 ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_gt_i16_e64 v255, v255 +// GFX12: v_cmpx_gt_i16_e64 v255, v255 ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_gt_i16_e64 s1, s2 +// GFX12: v_cmpx_gt_i16_e64 s1, s2 ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_gt_i16_e64 s105, s105 +// GFX12: v_cmpx_gt_i16_e64 s105, s105 ; encoding: [0x7e,0x00,0xb4,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_gt_i16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_gt_i16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xb4,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_gt_i16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_gt_i16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xb4,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_i16_e64 ttmp15, src_scc +// GFX12: v_cmpx_gt_i16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xb4,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_gt_i16_e64 m0, 0.5 +// GFX12: v_cmpx_gt_i16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xb4,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_gt_i16_e64 exec_lo, -1 +// GFX12: v_cmpx_gt_i16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xb4,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_gt_i16_e64 exec_hi, null +// GFX12: v_cmpx_gt_i16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xb4,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_gt_i16_e64 null, exec_lo +// GFX12: v_cmpx_gt_i16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xb4,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_gt_i16_e64 -1, exec_hi +// GFX12: v_cmpx_gt_i16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xb4,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_gt_i16_e64 0.5, m0 +// GFX12: v_cmpx_gt_i16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xb4,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_gt_i16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_gt_i16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xb4,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_gt_i16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_gt_i16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_i32_e64 v1, v2 +// GFX12: v_cmpx_gt_i32_e64 v1, v2 ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_gt_i32_e64 v255, v255 +// GFX12: v_cmpx_gt_i32_e64 v255, v255 ; encoding: [0x7e,0x00,0xc4,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_gt_i32_e64 s1, s2 +// GFX12: v_cmpx_gt_i32_e64 s1, s2 ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_gt_i32_e64 s105, s105 +// GFX12: v_cmpx_gt_i32_e64 s105, s105 ; encoding: [0x7e,0x00,0xc4,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_gt_i32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_gt_i32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xc4,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_gt_i32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_gt_i32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xc4,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_i32_e64 ttmp15, src_scc +// GFX12: v_cmpx_gt_i32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xc4,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_gt_i32_e64 m0, 0.5 +// GFX12: v_cmpx_gt_i32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xc4,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_gt_i32_e64 exec_lo, -1 +// GFX12: v_cmpx_gt_i32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xc4,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_gt_i32_e64 exec_hi, null +// GFX12: v_cmpx_gt_i32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xc4,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_gt_i32_e64 null, exec_lo +// GFX12: v_cmpx_gt_i32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xc4,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_gt_i32_e64 -1, exec_hi +// GFX12: v_cmpx_gt_i32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xc4,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_gt_i32_e64 0.5, m0 +// GFX12: v_cmpx_gt_i32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xc4,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_gt_i32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_gt_i32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xc4,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_gt_i32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_gt_i32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xc4,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_i64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_gt_i64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_gt_i64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_gt_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd4,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_gt_i64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_gt_i64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xd4,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_gt_i64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_gt_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd4,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_gt_i64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_gt_i64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xd4,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_gt_i64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_gt_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd4,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_i64_e64 exec, src_scc +// GFX12: v_cmpx_gt_i64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xd4,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_gt_i64_e64 null, 0.5 +// GFX12: v_cmpx_gt_i64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xd4,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_gt_i64_e64 -1, -1 +// GFX12: v_cmpx_gt_i64_e64 -1, -1 ; encoding: [0x7e,0x00,0xd4,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_gt_i64_e64 0.5, null +// GFX12: v_cmpx_gt_i64_e64 0.5, null ; encoding: [0x7e,0x00,0xd4,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_gt_i64_e64 src_scc, exec +// GFX12: v_cmpx_gt_i64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xd4,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_gt_i64_e64 0xaf123456, vcc +// GFX12: v_cmpx_gt_i64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xd4,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_u16_e64 v1, v2 +// GFX12: v_cmpx_gt_u16_e64 v1, v2 ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_gt_u16_e64 v255, v255 +// GFX12: v_cmpx_gt_u16_e64 v255, v255 ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_gt_u16_e64 s1, s2 +// GFX12: v_cmpx_gt_u16_e64 s1, s2 ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_gt_u16_e64 s105, s105 +// GFX12: v_cmpx_gt_u16_e64 s105, s105 ; encoding: [0x7e,0x00,0xbc,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_gt_u16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_gt_u16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xbc,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_gt_u16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_gt_u16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xbc,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_u16_e64 ttmp15, src_scc +// GFX12: v_cmpx_gt_u16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xbc,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_gt_u16_e64 m0, 0.5 +// GFX12: v_cmpx_gt_u16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xbc,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_gt_u16_e64 exec_lo, -1 +// GFX12: v_cmpx_gt_u16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xbc,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_gt_u16_e64 exec_hi, null +// GFX12: v_cmpx_gt_u16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xbc,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_gt_u16_e64 null, exec_lo +// GFX12: v_cmpx_gt_u16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xbc,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_gt_u16_e64 -1, exec_hi +// GFX12: v_cmpx_gt_u16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xbc,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_gt_u16_e64 0.5, m0 +// GFX12: v_cmpx_gt_u16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xbc,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_gt_u16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_gt_u16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xbc,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_gt_u16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_gt_u16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_u32_e64 v1, v2 +// GFX12: v_cmpx_gt_u32_e64 v1, v2 ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_gt_u32_e64 v255, v255 +// GFX12: v_cmpx_gt_u32_e64 v255, v255 ; encoding: [0x7e,0x00,0xcc,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_gt_u32_e64 s1, s2 +// GFX12: v_cmpx_gt_u32_e64 s1, s2 ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_gt_u32_e64 s105, s105 +// GFX12: v_cmpx_gt_u32_e64 s105, s105 ; encoding: [0x7e,0x00,0xcc,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_gt_u32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_gt_u32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xcc,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_gt_u32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_gt_u32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xcc,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_u32_e64 ttmp15, src_scc +// GFX12: v_cmpx_gt_u32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xcc,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_gt_u32_e64 m0, 0.5 +// GFX12: v_cmpx_gt_u32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xcc,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_gt_u32_e64 exec_lo, -1 +// GFX12: v_cmpx_gt_u32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xcc,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_gt_u32_e64 exec_hi, null +// GFX12: v_cmpx_gt_u32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xcc,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_gt_u32_e64 null, exec_lo +// GFX12: v_cmpx_gt_u32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xcc,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_gt_u32_e64 -1, exec_hi +// GFX12: v_cmpx_gt_u32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xcc,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_gt_u32_e64 0.5, m0 +// GFX12: v_cmpx_gt_u32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xcc,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_gt_u32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_gt_u32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xcc,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_gt_u32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_gt_u32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xcc,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_u64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_gt_u64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_gt_u64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_gt_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xdc,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_gt_u64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_gt_u64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xdc,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_gt_u64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_gt_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xdc,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_gt_u64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_gt_u64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xdc,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_gt_u64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_gt_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xdc,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_u64_e64 exec, src_scc +// GFX12: v_cmpx_gt_u64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xdc,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_gt_u64_e64 null, 0.5 +// GFX12: v_cmpx_gt_u64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xdc,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_gt_u64_e64 -1, -1 +// GFX12: v_cmpx_gt_u64_e64 -1, -1 ; encoding: [0x7e,0x00,0xdc,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_gt_u64_e64 0.5, null +// GFX12: v_cmpx_gt_u64_e64 0.5, null ; encoding: [0x7e,0x00,0xdc,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_gt_u64_e64 src_scc, exec +// GFX12: v_cmpx_gt_u64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xdc,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_gt_u64_e64 0xaf123456, vcc +// GFX12: v_cmpx_gt_u64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xdc,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_f16_e64 v1, v2 +// GFX12: v_cmpx_le_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_le_f16_e64 v255, v255 +// GFX12: v_cmpx_le_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x83,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_le_f16_e64 s1, s2 +// GFX12: v_cmpx_le_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_le_f16_e64 s105, s105 +// GFX12: v_cmpx_le_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x83,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_le_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_le_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x83,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_le_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_le_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x83,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_le_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x83,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_le_f16_e64 m0, 0.5 +// GFX12: v_cmpx_le_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x83,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_le_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_le_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x83,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_le_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_le_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x83,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_le_f16_e64 null, exec_lo +// GFX12: v_cmpx_le_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x83,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_le_f16_e64 -1, exec_hi +// GFX12: v_cmpx_le_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x83,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_le_f16_e64 0.5, -m0 +// GFX12: v_cmpx_le_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x83,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_le_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_le_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x83,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_le_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_le_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x83,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_f32_e64 v1, v2 +// GFX12: v_cmpx_le_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_le_f32_e64 v255, v255 +// GFX12: v_cmpx_le_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x93,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_le_f32_e64 s1, s2 +// GFX12: v_cmpx_le_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_le_f32_e64 s105, s105 +// GFX12: v_cmpx_le_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x93,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_le_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_le_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x93,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_le_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_le_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x93,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_le_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x93,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_le_f32_e64 m0, 0.5 +// GFX12: v_cmpx_le_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x93,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_le_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_le_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x93,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_le_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_le_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x93,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_le_f32_e64 null, exec_lo +// GFX12: v_cmpx_le_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x93,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_le_f32_e64 -1, exec_hi +// GFX12: v_cmpx_le_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x93,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_le_f32_e64 0.5, -m0 +// GFX12: v_cmpx_le_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x93,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_le_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_le_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x93,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_le_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_le_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x93,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_le_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_le_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_le_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_le_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa3,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_le_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_le_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xa3,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_le_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_le_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa3,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_le_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_le_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xa3,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_le_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_le_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa3,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_le_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xa3,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_le_f64_e64 null, 0.5 +// GFX12: v_cmpx_le_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xa3,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_le_f64_e64 -1, -1 +// GFX12: v_cmpx_le_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xa3,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_le_f64_e64 0.5, null +// GFX12: v_cmpx_le_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xa3,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_le_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_le_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xa3,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_le_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_le_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa3,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_le_i16_e64 v1, v2 +// GFX12: v_cmpx_le_i16_e64 v1, v2 ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_le_i16_e64 v255, v255 +// GFX12: v_cmpx_le_i16_e64 v255, v255 ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_le_i16_e64 s1, s2 +// GFX12: v_cmpx_le_i16_e64 s1, s2 ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_le_i16_e64 s105, s105 +// GFX12: v_cmpx_le_i16_e64 s105, s105 ; encoding: [0x7e,0x00,0xb3,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_le_i16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_le_i16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xb3,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_le_i16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_le_i16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xb3,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_i16_e64 ttmp15, src_scc +// GFX12: v_cmpx_le_i16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xb3,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_le_i16_e64 m0, 0.5 +// GFX12: v_cmpx_le_i16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xb3,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_le_i16_e64 exec_lo, -1 +// GFX12: v_cmpx_le_i16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xb3,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_le_i16_e64 exec_hi, null +// GFX12: v_cmpx_le_i16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xb3,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_le_i16_e64 null, exec_lo +// GFX12: v_cmpx_le_i16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xb3,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_le_i16_e64 -1, exec_hi +// GFX12: v_cmpx_le_i16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xb3,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_le_i16_e64 0.5, m0 +// GFX12: v_cmpx_le_i16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xb3,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_le_i16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_le_i16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xb3,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_le_i16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_le_i16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_i32_e64 v1, v2 +// GFX12: v_cmpx_le_i32_e64 v1, v2 ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_le_i32_e64 v255, v255 +// GFX12: v_cmpx_le_i32_e64 v255, v255 ; encoding: [0x7e,0x00,0xc3,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_le_i32_e64 s1, s2 +// GFX12: v_cmpx_le_i32_e64 s1, s2 ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_le_i32_e64 s105, s105 +// GFX12: v_cmpx_le_i32_e64 s105, s105 ; encoding: [0x7e,0x00,0xc3,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_le_i32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_le_i32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xc3,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_le_i32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_le_i32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xc3,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_i32_e64 ttmp15, src_scc +// GFX12: v_cmpx_le_i32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xc3,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_le_i32_e64 m0, 0.5 +// GFX12: v_cmpx_le_i32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xc3,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_le_i32_e64 exec_lo, -1 +// GFX12: v_cmpx_le_i32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xc3,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_le_i32_e64 exec_hi, null +// GFX12: v_cmpx_le_i32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xc3,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_le_i32_e64 null, exec_lo +// GFX12: v_cmpx_le_i32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xc3,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_le_i32_e64 -1, exec_hi +// GFX12: v_cmpx_le_i32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xc3,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_le_i32_e64 0.5, m0 +// GFX12: v_cmpx_le_i32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xc3,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_le_i32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_le_i32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xc3,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_le_i32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_le_i32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xc3,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_i64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_le_i64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_le_i64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_le_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd3,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_le_i64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_le_i64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xd3,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_le_i64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_le_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd3,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_le_i64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_le_i64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xd3,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_le_i64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_le_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd3,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_i64_e64 exec, src_scc +// GFX12: v_cmpx_le_i64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xd3,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_le_i64_e64 null, 0.5 +// GFX12: v_cmpx_le_i64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xd3,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_le_i64_e64 -1, -1 +// GFX12: v_cmpx_le_i64_e64 -1, -1 ; encoding: [0x7e,0x00,0xd3,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_le_i64_e64 0.5, null +// GFX12: v_cmpx_le_i64_e64 0.5, null ; encoding: [0x7e,0x00,0xd3,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_le_i64_e64 src_scc, exec +// GFX12: v_cmpx_le_i64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xd3,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_le_i64_e64 0xaf123456, vcc +// GFX12: v_cmpx_le_i64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xd3,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_u16_e64 v1, v2 +// GFX12: v_cmpx_le_u16_e64 v1, v2 ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_le_u16_e64 v255, v255 +// GFX12: v_cmpx_le_u16_e64 v255, v255 ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_le_u16_e64 s1, s2 +// GFX12: v_cmpx_le_u16_e64 s1, s2 ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_le_u16_e64 s105, s105 +// GFX12: v_cmpx_le_u16_e64 s105, s105 ; encoding: [0x7e,0x00,0xbb,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_le_u16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_le_u16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xbb,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_le_u16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_le_u16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xbb,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_u16_e64 ttmp15, src_scc +// GFX12: v_cmpx_le_u16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xbb,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_le_u16_e64 m0, 0.5 +// GFX12: v_cmpx_le_u16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xbb,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_le_u16_e64 exec_lo, -1 +// GFX12: v_cmpx_le_u16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xbb,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_le_u16_e64 exec_hi, null +// GFX12: v_cmpx_le_u16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xbb,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_le_u16_e64 null, exec_lo +// GFX12: v_cmpx_le_u16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xbb,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_le_u16_e64 -1, exec_hi +// GFX12: v_cmpx_le_u16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xbb,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_le_u16_e64 0.5, m0 +// GFX12: v_cmpx_le_u16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xbb,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_le_u16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_le_u16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xbb,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_le_u16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_le_u16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_u32_e64 v1, v2 +// GFX12: v_cmpx_le_u32_e64 v1, v2 ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_le_u32_e64 v255, v255 +// GFX12: v_cmpx_le_u32_e64 v255, v255 ; encoding: [0x7e,0x00,0xcb,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_le_u32_e64 s1, s2 +// GFX12: v_cmpx_le_u32_e64 s1, s2 ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_le_u32_e64 s105, s105 +// GFX12: v_cmpx_le_u32_e64 s105, s105 ; encoding: [0x7e,0x00,0xcb,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_le_u32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_le_u32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xcb,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_le_u32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_le_u32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xcb,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_u32_e64 ttmp15, src_scc +// GFX12: v_cmpx_le_u32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xcb,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_le_u32_e64 m0, 0.5 +// GFX12: v_cmpx_le_u32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xcb,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_le_u32_e64 exec_lo, -1 +// GFX12: v_cmpx_le_u32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xcb,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_le_u32_e64 exec_hi, null +// GFX12: v_cmpx_le_u32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xcb,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_le_u32_e64 null, exec_lo +// GFX12: v_cmpx_le_u32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xcb,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_le_u32_e64 -1, exec_hi +// GFX12: v_cmpx_le_u32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xcb,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_le_u32_e64 0.5, m0 +// GFX12: v_cmpx_le_u32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xcb,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_le_u32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_le_u32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xcb,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_le_u32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_le_u32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xcb,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_u64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_le_u64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_le_u64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_le_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xdb,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_le_u64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_le_u64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xdb,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_le_u64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_le_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xdb,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_le_u64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_le_u64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xdb,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_le_u64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_le_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xdb,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_le_u64_e64 exec, src_scc +// GFX12: v_cmpx_le_u64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xdb,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_le_u64_e64 null, 0.5 +// GFX12: v_cmpx_le_u64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xdb,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_le_u64_e64 -1, -1 +// GFX12: v_cmpx_le_u64_e64 -1, -1 ; encoding: [0x7e,0x00,0xdb,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_le_u64_e64 0.5, null +// GFX12: v_cmpx_le_u64_e64 0.5, null ; encoding: [0x7e,0x00,0xdb,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_le_u64_e64 src_scc, exec +// GFX12: v_cmpx_le_u64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xdb,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_le_u64_e64 0xaf123456, vcc +// GFX12: v_cmpx_le_u64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xdb,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lg_f16_e64 v1, v2 +// GFX12: v_cmpx_lg_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lg_f16_e64 v255, v255 +// GFX12: v_cmpx_lg_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x85,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_lg_f16_e64 s1, s2 +// GFX12: v_cmpx_lg_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_lg_f16_e64 s105, s105 +// GFX12: v_cmpx_lg_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x85,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_lg_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_lg_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x85,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_lg_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_lg_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x85,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_lg_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_lg_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x85,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_lg_f16_e64 m0, 0.5 +// GFX12: v_cmpx_lg_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x85,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_lg_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_lg_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x85,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_lg_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_lg_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x85,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_lg_f16_e64 null, exec_lo +// GFX12: v_cmpx_lg_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x85,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_lg_f16_e64 -1, exec_hi +// GFX12: v_cmpx_lg_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x85,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_lg_f16_e64 0.5, -m0 +// GFX12: v_cmpx_lg_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x85,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_lg_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_lg_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x85,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_lg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_lg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x85,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_lg_f32_e64 v1, v2 +// GFX12: v_cmpx_lg_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lg_f32_e64 v255, v255 +// GFX12: v_cmpx_lg_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x95,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_lg_f32_e64 s1, s2 +// GFX12: v_cmpx_lg_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_lg_f32_e64 s105, s105 +// GFX12: v_cmpx_lg_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x95,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_lg_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_lg_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x95,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_lg_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_lg_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x95,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lg_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_lg_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x95,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_lg_f32_e64 m0, 0.5 +// GFX12: v_cmpx_lg_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x95,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_lg_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_lg_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x95,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_lg_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_lg_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x95,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_lg_f32_e64 null, exec_lo +// GFX12: v_cmpx_lg_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x95,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_lg_f32_e64 -1, exec_hi +// GFX12: v_cmpx_lg_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x95,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_lg_f32_e64 0.5, -m0 +// GFX12: v_cmpx_lg_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x95,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_lg_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_lg_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x95,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_lg_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_lg_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x95,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_lg_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_lg_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lg_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_lg_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa5,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_lg_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_lg_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xa5,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_lg_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_lg_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa5,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_lg_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_lg_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xa5,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_lg_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_lg_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa5,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lg_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_lg_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xa5,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_lg_f64_e64 null, 0.5 +// GFX12: v_cmpx_lg_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xa5,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_lg_f64_e64 -1, -1 +// GFX12: v_cmpx_lg_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xa5,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_lg_f64_e64 0.5, null +// GFX12: v_cmpx_lg_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xa5,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_lg_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_lg_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xa5,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa5,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_f16_e64 v1, v2 +// GFX12: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_f16_e64 v255, v255 +// GFX12: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_lt_f16_e64 s1, s2 +// GFX12: v_cmpx_lt_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_lt_f16_e64 s105, s105 +// GFX12: v_cmpx_lt_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x81,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_lt_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_lt_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x81,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_lt_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_lt_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x81,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_lt_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x81,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_lt_f16_e64 m0, 0.5 +// GFX12: v_cmpx_lt_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x81,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_lt_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_lt_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x81,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_lt_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_lt_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x81,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_lt_f16_e64 null, exec_lo +// GFX12: v_cmpx_lt_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x81,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_lt_f16_e64 -1, exec_hi +// GFX12: v_cmpx_lt_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x81,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_lt_f16_e64 0.5, -m0 +// GFX12: v_cmpx_lt_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x81,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_lt_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_lt_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x81,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_f32_e64 v1, v2 +// GFX12: v_cmpx_lt_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_f32_e64 v255, v255 +// GFX12: v_cmpx_lt_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x91,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_lt_f32_e64 s1, s2 +// GFX12: v_cmpx_lt_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_lt_f32_e64 s105, s105 +// GFX12: v_cmpx_lt_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x91,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_lt_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_lt_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x91,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_lt_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_lt_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x91,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_lt_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x91,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_lt_f32_e64 m0, 0.5 +// GFX12: v_cmpx_lt_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x91,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_lt_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_lt_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x91,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_lt_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_lt_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x91,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_lt_f32_e64 null, exec_lo +// GFX12: v_cmpx_lt_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x91,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_lt_f32_e64 -1, exec_hi +// GFX12: v_cmpx_lt_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x91,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_lt_f32_e64 0.5, -m0 +// GFX12: v_cmpx_lt_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x91,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_lt_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_lt_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x91,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_lt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_lt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x91,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_lt_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_lt_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa1,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_lt_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_lt_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xa1,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_lt_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_lt_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa1,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_lt_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_lt_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xa1,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_lt_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_lt_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa1,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_lt_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xa1,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_lt_f64_e64 null, 0.5 +// GFX12: v_cmpx_lt_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xa1,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_lt_f64_e64 -1, -1 +// GFX12: v_cmpx_lt_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xa1,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_lt_f64_e64 0.5, null +// GFX12: v_cmpx_lt_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xa1,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_lt_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_lt_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xa1,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_lt_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_lt_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa1,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_i16_e64 v1, v2 +// GFX12: v_cmpx_lt_i16_e64 v1, v2 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_i16_e64 v255, v255 +// GFX12: v_cmpx_lt_i16_e64 v255, v255 ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_lt_i16_e64 s1, s2 +// GFX12: v_cmpx_lt_i16_e64 s1, s2 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_lt_i16_e64 s105, s105 +// GFX12: v_cmpx_lt_i16_e64 s105, s105 ; encoding: [0x7e,0x00,0xb1,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_lt_i16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_lt_i16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xb1,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_lt_i16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_lt_i16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xb1,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_i16_e64 ttmp15, src_scc +// GFX12: v_cmpx_lt_i16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xb1,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_lt_i16_e64 m0, 0.5 +// GFX12: v_cmpx_lt_i16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xb1,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_lt_i16_e64 exec_lo, -1 +// GFX12: v_cmpx_lt_i16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xb1,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_lt_i16_e64 exec_hi, null +// GFX12: v_cmpx_lt_i16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xb1,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_lt_i16_e64 null, exec_lo +// GFX12: v_cmpx_lt_i16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xb1,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_lt_i16_e64 -1, exec_hi +// GFX12: v_cmpx_lt_i16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xb1,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_lt_i16_e64 0.5, m0 +// GFX12: v_cmpx_lt_i16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xb1,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_lt_i16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_lt_i16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xb1,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_lt_i16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_lt_i16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_i32_e64 v1, v2 +// GFX12: v_cmpx_lt_i32_e64 v1, v2 ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_i32_e64 v255, v255 +// GFX12: v_cmpx_lt_i32_e64 v255, v255 ; encoding: [0x7e,0x00,0xc1,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_lt_i32_e64 s1, s2 +// GFX12: v_cmpx_lt_i32_e64 s1, s2 ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_lt_i32_e64 s105, s105 +// GFX12: v_cmpx_lt_i32_e64 s105, s105 ; encoding: [0x7e,0x00,0xc1,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_lt_i32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_lt_i32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xc1,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_lt_i32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_lt_i32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xc1,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_i32_e64 ttmp15, src_scc +// GFX12: v_cmpx_lt_i32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xc1,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_lt_i32_e64 m0, 0.5 +// GFX12: v_cmpx_lt_i32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xc1,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_lt_i32_e64 exec_lo, -1 +// GFX12: v_cmpx_lt_i32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xc1,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_lt_i32_e64 exec_hi, null +// GFX12: v_cmpx_lt_i32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xc1,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_lt_i32_e64 null, exec_lo +// GFX12: v_cmpx_lt_i32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xc1,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_lt_i32_e64 -1, exec_hi +// GFX12: v_cmpx_lt_i32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xc1,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_lt_i32_e64 0.5, m0 +// GFX12: v_cmpx_lt_i32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xc1,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_lt_i32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_lt_i32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xc1,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_lt_i32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_lt_i32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xc1,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_i64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_lt_i64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_i64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_lt_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd1,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_lt_i64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_lt_i64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xd1,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_lt_i64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_lt_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd1,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_lt_i64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_lt_i64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xd1,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_lt_i64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_lt_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd1,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_i64_e64 exec, src_scc +// GFX12: v_cmpx_lt_i64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xd1,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_lt_i64_e64 null, 0.5 +// GFX12: v_cmpx_lt_i64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xd1,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_lt_i64_e64 -1, -1 +// GFX12: v_cmpx_lt_i64_e64 -1, -1 ; encoding: [0x7e,0x00,0xd1,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_lt_i64_e64 0.5, null +// GFX12: v_cmpx_lt_i64_e64 0.5, null ; encoding: [0x7e,0x00,0xd1,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_lt_i64_e64 src_scc, exec +// GFX12: v_cmpx_lt_i64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xd1,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_lt_i64_e64 0xaf123456, vcc +// GFX12: v_cmpx_lt_i64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xd1,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_u16_e64 v1, v2 +// GFX12: v_cmpx_lt_u16_e64 v1, v2 ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_u16_e64 v255, v255 +// GFX12: v_cmpx_lt_u16_e64 v255, v255 ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_lt_u16_e64 s1, s2 +// GFX12: v_cmpx_lt_u16_e64 s1, s2 ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_lt_u16_e64 s105, s105 +// GFX12: v_cmpx_lt_u16_e64 s105, s105 ; encoding: [0x7e,0x00,0xb9,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_lt_u16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_lt_u16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xb9,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_lt_u16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_lt_u16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xb9,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_u16_e64 ttmp15, src_scc +// GFX12: v_cmpx_lt_u16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xb9,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_lt_u16_e64 m0, 0.5 +// GFX12: v_cmpx_lt_u16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xb9,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_lt_u16_e64 exec_lo, -1 +// GFX12: v_cmpx_lt_u16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xb9,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_lt_u16_e64 exec_hi, null +// GFX12: v_cmpx_lt_u16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xb9,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_lt_u16_e64 null, exec_lo +// GFX12: v_cmpx_lt_u16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xb9,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_lt_u16_e64 -1, exec_hi +// GFX12: v_cmpx_lt_u16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xb9,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_lt_u16_e64 0.5, m0 +// GFX12: v_cmpx_lt_u16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xb9,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_lt_u16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_lt_u16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xb9,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_lt_u16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_lt_u16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_u32_e64 v1, v2 +// GFX12: v_cmpx_lt_u32_e64 v1, v2 ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_u32_e64 v255, v255 +// GFX12: v_cmpx_lt_u32_e64 v255, v255 ; encoding: [0x7e,0x00,0xc9,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_lt_u32_e64 s1, s2 +// GFX12: v_cmpx_lt_u32_e64 s1, s2 ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_lt_u32_e64 s105, s105 +// GFX12: v_cmpx_lt_u32_e64 s105, s105 ; encoding: [0x7e,0x00,0xc9,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_lt_u32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_lt_u32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xc9,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_lt_u32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_lt_u32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xc9,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_u32_e64 ttmp15, src_scc +// GFX12: v_cmpx_lt_u32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xc9,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_lt_u32_e64 m0, 0.5 +// GFX12: v_cmpx_lt_u32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xc9,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_lt_u32_e64 exec_lo, -1 +// GFX12: v_cmpx_lt_u32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xc9,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_lt_u32_e64 exec_hi, null +// GFX12: v_cmpx_lt_u32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xc9,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_lt_u32_e64 null, exec_lo +// GFX12: v_cmpx_lt_u32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xc9,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_lt_u32_e64 -1, exec_hi +// GFX12: v_cmpx_lt_u32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xc9,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_lt_u32_e64 0.5, m0 +// GFX12: v_cmpx_lt_u32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xc9,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_lt_u32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_lt_u32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xc9,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_lt_u32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_lt_u32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xc9,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_u64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_lt_u64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_u64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_lt_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd9,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_lt_u64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_lt_u64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xd9,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_lt_u64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_lt_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd9,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_lt_u64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_lt_u64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xd9,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_lt_u64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_lt_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd9,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_u64_e64 exec, src_scc +// GFX12: v_cmpx_lt_u64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xd9,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_lt_u64_e64 null, 0.5 +// GFX12: v_cmpx_lt_u64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xd9,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_lt_u64_e64 -1, -1 +// GFX12: v_cmpx_lt_u64_e64 -1, -1 ; encoding: [0x7e,0x00,0xd9,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_lt_u64_e64 0.5, null +// GFX12: v_cmpx_lt_u64_e64 0.5, null ; encoding: [0x7e,0x00,0xd9,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_lt_u64_e64 src_scc, exec +// GFX12: v_cmpx_lt_u64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xd9,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_lt_u64_e64 0xaf123456, vcc +// GFX12: v_cmpx_lt_u64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xd9,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_i16_e64 v1, v2 +// GFX12: v_cmpx_ne_i16_e64 v1, v2 ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ne_i16_e64 v255, v255 +// GFX12: v_cmpx_ne_i16_e64 v255, v255 ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ne_i16_e64 s1, s2 +// GFX12: v_cmpx_ne_i16_e64 s1, s2 ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ne_i16_e64 s105, s105 +// GFX12: v_cmpx_ne_i16_e64 s105, s105 ; encoding: [0x7e,0x00,0xb5,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ne_i16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ne_i16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xb5,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ne_i16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_ne_i16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xb5,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ne_i16_e64 ttmp15, src_scc +// GFX12: v_cmpx_ne_i16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xb5,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ne_i16_e64 m0, 0.5 +// GFX12: v_cmpx_ne_i16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xb5,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ne_i16_e64 exec_lo, -1 +// GFX12: v_cmpx_ne_i16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xb5,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ne_i16_e64 exec_hi, null +// GFX12: v_cmpx_ne_i16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xb5,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ne_i16_e64 null, exec_lo +// GFX12: v_cmpx_ne_i16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xb5,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ne_i16_e64 -1, exec_hi +// GFX12: v_cmpx_ne_i16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xb5,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ne_i16_e64 0.5, m0 +// GFX12: v_cmpx_ne_i16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xb5,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_ne_i16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_ne_i16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xb5,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_ne_i16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_ne_i16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ne_i32_e64 v1, v2 +// GFX12: v_cmpx_ne_i32_e64 v1, v2 ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ne_i32_e64 v255, v255 +// GFX12: v_cmpx_ne_i32_e64 v255, v255 ; encoding: [0x7e,0x00,0xc5,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ne_i32_e64 s1, s2 +// GFX12: v_cmpx_ne_i32_e64 s1, s2 ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ne_i32_e64 s105, s105 +// GFX12: v_cmpx_ne_i32_e64 s105, s105 ; encoding: [0x7e,0x00,0xc5,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ne_i32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ne_i32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xc5,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ne_i32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_ne_i32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xc5,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_i32_e64 ttmp15, src_scc +// GFX12: v_cmpx_ne_i32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xc5,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ne_i32_e64 m0, 0.5 +// GFX12: v_cmpx_ne_i32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xc5,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ne_i32_e64 exec_lo, -1 +// GFX12: v_cmpx_ne_i32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xc5,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ne_i32_e64 exec_hi, null +// GFX12: v_cmpx_ne_i32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xc5,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ne_i32_e64 null, exec_lo +// GFX12: v_cmpx_ne_i32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xc5,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ne_i32_e64 -1, exec_hi +// GFX12: v_cmpx_ne_i32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xc5,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ne_i32_e64 0.5, m0 +// GFX12: v_cmpx_ne_i32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xc5,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_ne_i32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_ne_i32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xc5,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_ne_i32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_ne_i32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xc5,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_i64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_ne_i64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ne_i64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_ne_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd5,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_ne_i64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_ne_i64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xd5,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_ne_i64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_ne_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd5,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_ne_i64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_ne_i64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xd5,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_ne_i64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_ne_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd5,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_i64_e64 exec, src_scc +// GFX12: v_cmpx_ne_i64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xd5,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_ne_i64_e64 null, 0.5 +// GFX12: v_cmpx_ne_i64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xd5,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_ne_i64_e64 -1, -1 +// GFX12: v_cmpx_ne_i64_e64 -1, -1 ; encoding: [0x7e,0x00,0xd5,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_ne_i64_e64 0.5, null +// GFX12: v_cmpx_ne_i64_e64 0.5, null ; encoding: [0x7e,0x00,0xd5,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_ne_i64_e64 src_scc, exec +// GFX12: v_cmpx_ne_i64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xd5,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_ne_i64_e64 0xaf123456, vcc +// GFX12: v_cmpx_ne_i64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xd5,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_u16_e64 v1, v2 +// GFX12: v_cmpx_ne_u16_e64 v1, v2 ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ne_u16_e64 v255, v255 +// GFX12: v_cmpx_ne_u16_e64 v255, v255 ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ne_u16_e64 s1, s2 +// GFX12: v_cmpx_ne_u16_e64 s1, s2 ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ne_u16_e64 s105, s105 +// GFX12: v_cmpx_ne_u16_e64 s105, s105 ; encoding: [0x7e,0x00,0xbd,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ne_u16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ne_u16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xbd,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ne_u16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_ne_u16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0xbd,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ne_u16_e64 ttmp15, src_scc +// GFX12: v_cmpx_ne_u16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xbd,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ne_u16_e64 m0, 0.5 +// GFX12: v_cmpx_ne_u16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xbd,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ne_u16_e64 exec_lo, -1 +// GFX12: v_cmpx_ne_u16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xbd,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ne_u16_e64 exec_hi, null +// GFX12: v_cmpx_ne_u16_e64 exec_hi, null ; encoding: [0x7e,0x00,0xbd,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ne_u16_e64 null, exec_lo +// GFX12: v_cmpx_ne_u16_e64 null, exec_lo ; encoding: [0x7e,0x00,0xbd,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ne_u16_e64 -1, exec_hi +// GFX12: v_cmpx_ne_u16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xbd,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ne_u16_e64 0.5, m0 +// GFX12: v_cmpx_ne_u16_e64 0.5, m0 ; encoding: [0x7e,0x00,0xbd,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_ne_u16_e64 src_scc, vcc_lo +// GFX12: v_cmpx_ne_u16_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xbd,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_ne_u16_e64 0xfe0b, vcc_hi +// GFX12: v_cmpx_ne_u16_e64 0xfe0b, vcc_hi ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ne_u32_e64 v1, v2 +// GFX12: v_cmpx_ne_u32_e64 v1, v2 ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ne_u32_e64 v255, v255 +// GFX12: v_cmpx_ne_u32_e64 v255, v255 ; encoding: [0x7e,0x00,0xcd,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ne_u32_e64 s1, s2 +// GFX12: v_cmpx_ne_u32_e64 s1, s2 ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ne_u32_e64 s105, s105 +// GFX12: v_cmpx_ne_u32_e64 s105, s105 ; encoding: [0x7e,0x00,0xcd,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ne_u32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ne_u32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0xcd,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ne_u32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_ne_u32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xcd,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_u32_e64 ttmp15, src_scc +// GFX12: v_cmpx_ne_u32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0xcd,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ne_u32_e64 m0, 0.5 +// GFX12: v_cmpx_ne_u32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0xcd,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ne_u32_e64 exec_lo, -1 +// GFX12: v_cmpx_ne_u32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0xcd,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ne_u32_e64 exec_hi, null +// GFX12: v_cmpx_ne_u32_e64 exec_hi, null ; encoding: [0x7e,0x00,0xcd,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ne_u32_e64 null, exec_lo +// GFX12: v_cmpx_ne_u32_e64 null, exec_lo ; encoding: [0x7e,0x00,0xcd,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ne_u32_e64 -1, exec_hi +// GFX12: v_cmpx_ne_u32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0xcd,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ne_u32_e64 0.5, m0 +// GFX12: v_cmpx_ne_u32_e64 0.5, m0 ; encoding: [0x7e,0x00,0xcd,0xd4,0xf0,0xfa,0x00,0x00] + +v_cmpx_ne_u32_e64 src_scc, vcc_lo +// GFX12: v_cmpx_ne_u32_e64 src_scc, vcc_lo ; encoding: [0x7e,0x00,0xcd,0xd4,0xfd,0xd4,0x00,0x00] + +v_cmpx_ne_u32_e64 0xaf123456, vcc_hi +// GFX12: v_cmpx_ne_u32_e64 0xaf123456, vcc_hi ; encoding: [0x7e,0x00,0xcd,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_u64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_ne_u64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ne_u64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_ne_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xdd,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_ne_u64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_ne_u64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xdd,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_ne_u64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_ne_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xdd,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_ne_u64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_ne_u64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xdd,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_ne_u64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_ne_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xdd,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_u64_e64 exec, src_scc +// GFX12: v_cmpx_ne_u64_e64 exec, src_scc ; encoding: [0x7e,0x00,0xdd,0xd4,0x7e,0xfa,0x01,0x00] + +v_cmpx_ne_u64_e64 null, 0.5 +// GFX12: v_cmpx_ne_u64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xdd,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_ne_u64_e64 -1, -1 +// GFX12: v_cmpx_ne_u64_e64 -1, -1 ; encoding: [0x7e,0x00,0xdd,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_ne_u64_e64 0.5, null +// GFX12: v_cmpx_ne_u64_e64 0.5, null ; encoding: [0x7e,0x00,0xdd,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_ne_u64_e64 src_scc, exec +// GFX12: v_cmpx_ne_u64_e64 src_scc, exec ; encoding: [0x7e,0x00,0xdd,0xd4,0xfd,0xfc,0x00,0x00] + +v_cmpx_ne_u64_e64 0xaf123456, vcc +// GFX12: v_cmpx_ne_u64_e64 0xaf123456, vcc ; encoding: [0x7e,0x00,0xdd,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_neq_f16_e64 v1, v2 +// GFX12: v_cmpx_neq_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_neq_f16_e64 v255, v255 +// GFX12: v_cmpx_neq_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x8d,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_neq_f16_e64 s1, s2 +// GFX12: v_cmpx_neq_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_neq_f16_e64 s105, s105 +// GFX12: v_cmpx_neq_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x8d,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_neq_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_neq_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x8d,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_neq_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_neq_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x8d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_neq_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_neq_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x8d,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_neq_f16_e64 m0, 0.5 +// GFX12: v_cmpx_neq_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x8d,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_neq_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_neq_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x8d,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_neq_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_neq_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x8d,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_neq_f16_e64 null, exec_lo +// GFX12: v_cmpx_neq_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x8d,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_neq_f16_e64 -1, exec_hi +// GFX12: v_cmpx_neq_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x8d,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_neq_f16_e64 0.5, -m0 +// GFX12: v_cmpx_neq_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x8d,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_neq_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_neq_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x8d,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_neq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_neq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_neq_f32_e64 v1, v2 +// GFX12: v_cmpx_neq_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_neq_f32_e64 v255, v255 +// GFX12: v_cmpx_neq_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x9d,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_neq_f32_e64 s1, s2 +// GFX12: v_cmpx_neq_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_neq_f32_e64 s105, s105 +// GFX12: v_cmpx_neq_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x9d,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_neq_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_neq_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x9d,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_neq_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_neq_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x9d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_neq_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_neq_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x9d,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_neq_f32_e64 m0, 0.5 +// GFX12: v_cmpx_neq_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x9d,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_neq_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_neq_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x9d,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_neq_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_neq_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x9d,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_neq_f32_e64 null, exec_lo +// GFX12: v_cmpx_neq_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x9d,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_neq_f32_e64 -1, exec_hi +// GFX12: v_cmpx_neq_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x9d,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_neq_f32_e64 0.5, -m0 +// GFX12: v_cmpx_neq_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x9d,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_neq_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_neq_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x9d,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_neq_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_neq_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9d,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_neq_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_neq_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_neq_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_neq_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xad,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_neq_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_neq_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xad,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_neq_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_neq_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xad,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_neq_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_neq_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xad,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_neq_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_neq_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xad,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_neq_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_neq_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xad,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_neq_f64_e64 null, 0.5 +// GFX12: v_cmpx_neq_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xad,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_neq_f64_e64 -1, -1 +// GFX12: v_cmpx_neq_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xad,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_neq_f64_e64 0.5, null +// GFX12: v_cmpx_neq_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xad,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_neq_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_neq_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xad,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_neq_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_neq_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xad,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_nge_f16_e64 v1, v2 +// GFX12: v_cmpx_nge_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nge_f16_e64 v255, v255 +// GFX12: v_cmpx_nge_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x89,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_nge_f16_e64 s1, s2 +// GFX12: v_cmpx_nge_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_nge_f16_e64 s105, s105 +// GFX12: v_cmpx_nge_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x89,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_nge_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_nge_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x89,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_nge_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_nge_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x89,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_nge_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_nge_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x89,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_nge_f16_e64 m0, 0.5 +// GFX12: v_cmpx_nge_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x89,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_nge_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_nge_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x89,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_nge_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_nge_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x89,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_nge_f16_e64 null, exec_lo +// GFX12: v_cmpx_nge_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x89,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_nge_f16_e64 -1, exec_hi +// GFX12: v_cmpx_nge_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x89,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_nge_f16_e64 0.5, -m0 +// GFX12: v_cmpx_nge_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x89,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_nge_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_nge_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x89,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_nge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_nge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x89,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_nge_f32_e64 v1, v2 +// GFX12: v_cmpx_nge_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nge_f32_e64 v255, v255 +// GFX12: v_cmpx_nge_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x99,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_nge_f32_e64 s1, s2 +// GFX12: v_cmpx_nge_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_nge_f32_e64 s105, s105 +// GFX12: v_cmpx_nge_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x99,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_nge_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_nge_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x99,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_nge_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_nge_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x99,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_nge_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_nge_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x99,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_nge_f32_e64 m0, 0.5 +// GFX12: v_cmpx_nge_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x99,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_nge_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_nge_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x99,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_nge_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_nge_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x99,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_nge_f32_e64 null, exec_lo +// GFX12: v_cmpx_nge_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x99,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_nge_f32_e64 -1, exec_hi +// GFX12: v_cmpx_nge_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x99,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_nge_f32_e64 0.5, -m0 +// GFX12: v_cmpx_nge_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x99,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_nge_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_nge_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x99,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_nge_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_nge_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x99,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_nge_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_nge_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nge_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_nge_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa9,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_nge_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_nge_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xa9,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_nge_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_nge_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa9,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_nge_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_nge_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xa9,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_nge_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_nge_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa9,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_nge_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_nge_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xa9,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_nge_f64_e64 null, 0.5 +// GFX12: v_cmpx_nge_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xa9,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_nge_f64_e64 -1, -1 +// GFX12: v_cmpx_nge_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xa9,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_nge_f64_e64 0.5, null +// GFX12: v_cmpx_nge_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xa9,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_nge_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_nge_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xa9,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_nge_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_nge_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa9,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_ngt_f16_e64 v1, v2 +// GFX12: v_cmpx_ngt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ngt_f16_e64 v255, v255 +// GFX12: v_cmpx_ngt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x8b,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ngt_f16_e64 s1, s2 +// GFX12: v_cmpx_ngt_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ngt_f16_e64 s105, s105 +// GFX12: v_cmpx_ngt_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x8b,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ngt_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ngt_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x8b,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ngt_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_ngt_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x8b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_ngt_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_ngt_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x8b,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ngt_f16_e64 m0, 0.5 +// GFX12: v_cmpx_ngt_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x8b,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ngt_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_ngt_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x8b,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ngt_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_ngt_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x8b,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ngt_f16_e64 null, exec_lo +// GFX12: v_cmpx_ngt_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x8b,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ngt_f16_e64 -1, exec_hi +// GFX12: v_cmpx_ngt_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x8b,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ngt_f16_e64 0.5, -m0 +// GFX12: v_cmpx_ngt_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x8b,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_ngt_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_ngt_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x8b,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_ngt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_ngt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_ngt_f32_e64 v1, v2 +// GFX12: v_cmpx_ngt_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ngt_f32_e64 v255, v255 +// GFX12: v_cmpx_ngt_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x9b,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_ngt_f32_e64 s1, s2 +// GFX12: v_cmpx_ngt_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_ngt_f32_e64 s105, s105 +// GFX12: v_cmpx_ngt_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x9b,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_ngt_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_ngt_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x9b,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_ngt_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_ngt_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x9b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ngt_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_ngt_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x9b,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_ngt_f32_e64 m0, 0.5 +// GFX12: v_cmpx_ngt_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x9b,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_ngt_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_ngt_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x9b,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_ngt_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_ngt_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x9b,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_ngt_f32_e64 null, exec_lo +// GFX12: v_cmpx_ngt_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x9b,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_ngt_f32_e64 -1, exec_hi +// GFX12: v_cmpx_ngt_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x9b,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_ngt_f32_e64 0.5, -m0 +// GFX12: v_cmpx_ngt_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x9b,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_ngt_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_ngt_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x9b,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_ngt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_ngt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9b,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_ngt_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_ngt_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_ngt_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_ngt_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xab,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_ngt_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_ngt_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xab,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_ngt_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_ngt_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xab,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_ngt_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_ngt_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xab,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_ngt_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_ngt_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xab,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_ngt_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_ngt_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xab,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_ngt_f64_e64 null, 0.5 +// GFX12: v_cmpx_ngt_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xab,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_ngt_f64_e64 -1, -1 +// GFX12: v_cmpx_ngt_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xab,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_ngt_f64_e64 0.5, null +// GFX12: v_cmpx_ngt_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xab,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_ngt_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_ngt_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xab,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_ngt_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_ngt_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xab,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_nle_f16_e64 v1, v2 +// GFX12: v_cmpx_nle_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nle_f16_e64 v255, v255 +// GFX12: v_cmpx_nle_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x8c,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_nle_f16_e64 s1, s2 +// GFX12: v_cmpx_nle_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_nle_f16_e64 s105, s105 +// GFX12: v_cmpx_nle_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x8c,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_nle_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_nle_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x8c,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_nle_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_nle_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x8c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_nle_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_nle_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x8c,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_nle_f16_e64 m0, 0.5 +// GFX12: v_cmpx_nle_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x8c,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_nle_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_nle_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x8c,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_nle_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_nle_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x8c,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_nle_f16_e64 null, exec_lo +// GFX12: v_cmpx_nle_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x8c,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_nle_f16_e64 -1, exec_hi +// GFX12: v_cmpx_nle_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x8c,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_nle_f16_e64 0.5, -m0 +// GFX12: v_cmpx_nle_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x8c,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_nle_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_nle_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x8c,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_nle_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_nle_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_nle_f32_e64 v1, v2 +// GFX12: v_cmpx_nle_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nle_f32_e64 v255, v255 +// GFX12: v_cmpx_nle_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x9c,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_nle_f32_e64 s1, s2 +// GFX12: v_cmpx_nle_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_nle_f32_e64 s105, s105 +// GFX12: v_cmpx_nle_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x9c,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_nle_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_nle_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x9c,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_nle_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_nle_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x9c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_nle_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_nle_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x9c,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_nle_f32_e64 m0, 0.5 +// GFX12: v_cmpx_nle_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x9c,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_nle_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_nle_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x9c,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_nle_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_nle_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x9c,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_nle_f32_e64 null, exec_lo +// GFX12: v_cmpx_nle_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x9c,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_nle_f32_e64 -1, exec_hi +// GFX12: v_cmpx_nle_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x9c,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_nle_f32_e64 0.5, -m0 +// GFX12: v_cmpx_nle_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x9c,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_nle_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_nle_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x9c,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_nle_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_nle_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9c,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_nle_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_nle_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nle_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_nle_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xac,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_nle_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_nle_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xac,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_nle_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_nle_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xac,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_nle_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_nle_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xac,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_nle_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_nle_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xac,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_nle_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_nle_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xac,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_nle_f64_e64 null, 0.5 +// GFX12: v_cmpx_nle_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xac,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_nle_f64_e64 -1, -1 +// GFX12: v_cmpx_nle_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xac,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_nle_f64_e64 0.5, null +// GFX12: v_cmpx_nle_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xac,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_nle_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_nle_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xac,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_nle_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_nle_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xac,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_nlg_f16_e64 v1, v2 +// GFX12: v_cmpx_nlg_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nlg_f16_e64 v255, v255 +// GFX12: v_cmpx_nlg_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x8a,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_nlg_f16_e64 s1, s2 +// GFX12: v_cmpx_nlg_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_nlg_f16_e64 s105, s105 +// GFX12: v_cmpx_nlg_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x8a,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_nlg_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_nlg_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x8a,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_nlg_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_nlg_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x8a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_nlg_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_nlg_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x8a,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_nlg_f16_e64 m0, 0.5 +// GFX12: v_cmpx_nlg_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x8a,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_nlg_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_nlg_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x8a,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_nlg_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_nlg_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x8a,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_nlg_f16_e64 null, exec_lo +// GFX12: v_cmpx_nlg_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x8a,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_nlg_f16_e64 -1, exec_hi +// GFX12: v_cmpx_nlg_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x8a,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_nlg_f16_e64 0.5, -m0 +// GFX12: v_cmpx_nlg_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x8a,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_nlg_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_nlg_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x8a,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_nlg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_nlg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_nlg_f32_e64 v1, v2 +// GFX12: v_cmpx_nlg_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nlg_f32_e64 v255, v255 +// GFX12: v_cmpx_nlg_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x9a,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_nlg_f32_e64 s1, s2 +// GFX12: v_cmpx_nlg_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_nlg_f32_e64 s105, s105 +// GFX12: v_cmpx_nlg_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x9a,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_nlg_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_nlg_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x9a,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_nlg_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_nlg_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x9a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_nlg_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_nlg_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x9a,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_nlg_f32_e64 m0, 0.5 +// GFX12: v_cmpx_nlg_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x9a,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_nlg_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_nlg_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x9a,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_nlg_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_nlg_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x9a,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_nlg_f32_e64 null, exec_lo +// GFX12: v_cmpx_nlg_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x9a,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_nlg_f32_e64 -1, exec_hi +// GFX12: v_cmpx_nlg_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x9a,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_nlg_f32_e64 0.5, -m0 +// GFX12: v_cmpx_nlg_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x9a,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_nlg_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_nlg_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x9a,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_nlg_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_nlg_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9a,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_nlg_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_nlg_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nlg_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_nlg_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xaa,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_nlg_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_nlg_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xaa,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_nlg_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_nlg_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xaa,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_nlg_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_nlg_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xaa,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_nlg_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_nlg_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xaa,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_nlg_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_nlg_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xaa,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_nlg_f64_e64 null, 0.5 +// GFX12: v_cmpx_nlg_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xaa,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_nlg_f64_e64 -1, -1 +// GFX12: v_cmpx_nlg_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xaa,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_nlg_f64_e64 0.5, null +// GFX12: v_cmpx_nlg_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xaa,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_nlg_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_nlg_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xaa,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_nlg_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_nlg_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xaa,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_nlt_f16_e64 v1, v2 +// GFX12: v_cmpx_nlt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nlt_f16_e64 v255, v255 +// GFX12: v_cmpx_nlt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x8e,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_nlt_f16_e64 s1, s2 +// GFX12: v_cmpx_nlt_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_nlt_f16_e64 s105, s105 +// GFX12: v_cmpx_nlt_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x8e,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_nlt_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_nlt_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x8e,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_nlt_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_nlt_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x8e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_nlt_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_nlt_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x8e,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_nlt_f16_e64 m0, 0.5 +// GFX12: v_cmpx_nlt_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x8e,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_nlt_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_nlt_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x8e,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_nlt_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_nlt_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x8e,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_nlt_f16_e64 null, exec_lo +// GFX12: v_cmpx_nlt_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x8e,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_nlt_f16_e64 -1, exec_hi +// GFX12: v_cmpx_nlt_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x8e,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_nlt_f16_e64 0.5, -m0 +// GFX12: v_cmpx_nlt_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x8e,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_nlt_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_nlt_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x8e,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_nlt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_nlt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_nlt_f32_e64 v1, v2 +// GFX12: v_cmpx_nlt_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nlt_f32_e64 v255, v255 +// GFX12: v_cmpx_nlt_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x9e,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_nlt_f32_e64 s1, s2 +// GFX12: v_cmpx_nlt_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_nlt_f32_e64 s105, s105 +// GFX12: v_cmpx_nlt_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x9e,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_nlt_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_nlt_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x9e,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_nlt_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_nlt_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x9e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_nlt_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_nlt_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x9e,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_nlt_f32_e64 m0, 0.5 +// GFX12: v_cmpx_nlt_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x9e,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_nlt_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_nlt_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x9e,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_nlt_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_nlt_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x9e,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_nlt_f32_e64 null, exec_lo +// GFX12: v_cmpx_nlt_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x9e,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_nlt_f32_e64 -1, exec_hi +// GFX12: v_cmpx_nlt_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x9e,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_nlt_f32_e64 0.5, -m0 +// GFX12: v_cmpx_nlt_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x9e,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_nlt_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_nlt_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x9e,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_nlt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_nlt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9e,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_nlt_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_nlt_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_nlt_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_nlt_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xae,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_nlt_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_nlt_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xae,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_nlt_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_nlt_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xae,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_nlt_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_nlt_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xae,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_nlt_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_nlt_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xae,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_nlt_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_nlt_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xae,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_nlt_f64_e64 null, 0.5 +// GFX12: v_cmpx_nlt_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xae,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_nlt_f64_e64 -1, -1 +// GFX12: v_cmpx_nlt_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xae,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_nlt_f64_e64 0.5, null +// GFX12: v_cmpx_nlt_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xae,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_nlt_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_nlt_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xae,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_nlt_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_nlt_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xae,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_o_f16_e64 v1, v2 +// GFX12: v_cmpx_o_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_o_f16_e64 v255, v255 +// GFX12: v_cmpx_o_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x87,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_o_f16_e64 s1, s2 +// GFX12: v_cmpx_o_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_o_f16_e64 s105, s105 +// GFX12: v_cmpx_o_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x87,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_o_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_o_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x87,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_o_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_o_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x87,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_o_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_o_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x87,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_o_f16_e64 m0, 0.5 +// GFX12: v_cmpx_o_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x87,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_o_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_o_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x87,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_o_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_o_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x87,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_o_f16_e64 null, exec_lo +// GFX12: v_cmpx_o_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x87,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_o_f16_e64 -1, exec_hi +// GFX12: v_cmpx_o_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x87,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_o_f16_e64 0.5, -m0 +// GFX12: v_cmpx_o_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x87,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_o_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_o_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x87,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_o_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_o_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x87,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_o_f32_e64 v1, v2 +// GFX12: v_cmpx_o_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_o_f32_e64 v255, v255 +// GFX12: v_cmpx_o_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x97,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_o_f32_e64 s1, s2 +// GFX12: v_cmpx_o_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_o_f32_e64 s105, s105 +// GFX12: v_cmpx_o_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x97,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_o_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_o_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x97,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_o_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_o_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x97,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_o_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_o_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x97,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_o_f32_e64 m0, 0.5 +// GFX12: v_cmpx_o_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x97,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_o_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_o_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x97,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_o_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_o_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x97,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_o_f32_e64 null, exec_lo +// GFX12: v_cmpx_o_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x97,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_o_f32_e64 -1, exec_hi +// GFX12: v_cmpx_o_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x97,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_o_f32_e64 0.5, -m0 +// GFX12: v_cmpx_o_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x97,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_o_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_o_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x97,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_o_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_o_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x97,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_o_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_o_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_o_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_o_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa7,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_o_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_o_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xa7,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_o_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_o_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa7,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_o_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_o_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xa7,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_o_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_o_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa7,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_o_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_o_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xa7,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_o_f64_e64 null, 0.5 +// GFX12: v_cmpx_o_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xa7,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_o_f64_e64 -1, -1 +// GFX12: v_cmpx_o_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xa7,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_o_f64_e64 0.5, null +// GFX12: v_cmpx_o_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xa7,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_o_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_o_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xa7,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_o_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_o_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa7,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmpx_u_f16_e64 v1, v2 +// GFX12: v_cmpx_u_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_u_f16_e64 v255, v255 +// GFX12: v_cmpx_u_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x88,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_u_f16_e64 s1, s2 +// GFX12: v_cmpx_u_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_u_f16_e64 s105, s105 +// GFX12: v_cmpx_u_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x88,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_u_f16_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_u_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x88,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_u_f16_e64 vcc_hi, 0xfe0b +// GFX12: v_cmpx_u_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x88,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cmpx_u_f16_e64 ttmp15, src_scc +// GFX12: v_cmpx_u_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x88,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_u_f16_e64 m0, 0.5 +// GFX12: v_cmpx_u_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x88,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_u_f16_e64 exec_lo, -1 +// GFX12: v_cmpx_u_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x88,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_u_f16_e64 |exec_hi|, null +// GFX12: v_cmpx_u_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x88,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_u_f16_e64 null, exec_lo +// GFX12: v_cmpx_u_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x88,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_u_f16_e64 -1, exec_hi +// GFX12: v_cmpx_u_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x88,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_u_f16_e64 0.5, -m0 +// GFX12: v_cmpx_u_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x88,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_u_f16_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_u_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x88,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_u_f16_e64 -|0xfe0b|, -|vcc_hi| clamp +// GFX12: v_cmpx_u_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x88,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmpx_u_f32_e64 v1, v2 +// GFX12: v_cmpx_u_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_u_f32_e64 v255, v255 +// GFX12: v_cmpx_u_f32_e64 v255, v255 ; encoding: [0x7e,0x00,0x98,0xd4,0xff,0xff,0x03,0x00] + +v_cmpx_u_f32_e64 s1, s2 +// GFX12: v_cmpx_u_f32_e64 s1, s2 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x04,0x00,0x00] + +v_cmpx_u_f32_e64 s105, s105 +// GFX12: v_cmpx_u_f32_e64 s105, s105 ; encoding: [0x7e,0x00,0x98,0xd4,0x69,0xd2,0x00,0x00] + +v_cmpx_u_f32_e64 vcc_lo, ttmp15 +// GFX12: v_cmpx_u_f32_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x98,0xd4,0x6a,0xf6,0x00,0x00] + +v_cmpx_u_f32_e64 vcc_hi, 0xaf123456 +// GFX12: v_cmpx_u_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0x98,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_u_f32_e64 ttmp15, src_scc +// GFX12: v_cmpx_u_f32_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x98,0xd4,0x7b,0xfa,0x01,0x00] + +v_cmpx_u_f32_e64 m0, 0.5 +// GFX12: v_cmpx_u_f32_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x98,0xd4,0x7d,0xe0,0x01,0x00] + +v_cmpx_u_f32_e64 exec_lo, -1 +// GFX12: v_cmpx_u_f32_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x98,0xd4,0x7e,0x82,0x01,0x00] + +v_cmpx_u_f32_e64 |exec_hi|, null +// GFX12: v_cmpx_u_f32_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x98,0xd4,0x7f,0xf8,0x00,0x00] + +v_cmpx_u_f32_e64 null, exec_lo +// GFX12: v_cmpx_u_f32_e64 null, exec_lo ; encoding: [0x7e,0x00,0x98,0xd4,0x7c,0xfc,0x00,0x00] + +v_cmpx_u_f32_e64 -1, exec_hi +// GFX12: v_cmpx_u_f32_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x98,0xd4,0xc1,0xfe,0x00,0x00] + +v_cmpx_u_f32_e64 0.5, -m0 +// GFX12: v_cmpx_u_f32_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x98,0xd4,0xf0,0xfa,0x00,0x40] + +v_cmpx_u_f32_e64 -src_scc, |vcc_lo| +// GFX12: v_cmpx_u_f32_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x98,0xd4,0xfd,0xd4,0x00,0x20] + +v_cmpx_u_f32_e64 -|0xaf123456|, -|vcc_hi| clamp +// GFX12: v_cmpx_u_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x98,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmpx_u_f64_e64 v[1:2], v[2:3] +// GFX12: v_cmpx_u_f64_e64 v[1:2], v[2:3] ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_u_f64_e64 v[254:255], v[254:255] +// GFX12: v_cmpx_u_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa8,0xd4,0xfe,0xfd,0x03,0x00] + +v_cmpx_u_f64_e64 s[2:3], s[4:5] +// GFX12: v_cmpx_u_f64_e64 s[2:3], s[4:5] ; encoding: [0x7e,0x00,0xa8,0xd4,0x02,0x08,0x00,0x00] + +v_cmpx_u_f64_e64 s[104:105], s[104:105] +// GFX12: v_cmpx_u_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa8,0xd4,0x68,0xd0,0x00,0x00] + +v_cmpx_u_f64_e64 vcc, ttmp[14:15] +// GFX12: v_cmpx_u_f64_e64 vcc, ttmp[14:15] ; encoding: [0x7e,0x00,0xa8,0xd4,0x6a,0xf4,0x00,0x00] + +v_cmpx_u_f64_e64 ttmp[14:15], 0xaf123456 +// GFX12: v_cmpx_u_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa8,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmpx_u_f64_e64 -|exec|, src_scc +// GFX12: v_cmpx_u_f64_e64 -|exec|, src_scc ; encoding: [0x7e,0x01,0xa8,0xd4,0x7e,0xfa,0x01,0x20] + +v_cmpx_u_f64_e64 null, 0.5 +// GFX12: v_cmpx_u_f64_e64 null, 0.5 ; encoding: [0x7e,0x00,0xa8,0xd4,0x7c,0xe0,0x01,0x00] + +v_cmpx_u_f64_e64 -1, -1 +// GFX12: v_cmpx_u_f64_e64 -1, -1 ; encoding: [0x7e,0x00,0xa8,0xd4,0xc1,0x82,0x01,0x00] + +v_cmpx_u_f64_e64 0.5, null +// GFX12: v_cmpx_u_f64_e64 0.5, null ; encoding: [0x7e,0x00,0xa8,0xd4,0xf0,0xf8,0x00,0x00] + +v_cmpx_u_f64_e64 -|src_scc|, -|exec| +// GFX12: v_cmpx_u_f64_e64 -|src_scc|, -|exec| ; encoding: [0x7e,0x03,0xa8,0xd4,0xfd,0xfc,0x00,0x60] + +v_cmpx_u_f64_e64 0xaf123456, -|vcc| clamp +// GFX12: v_cmpx_u_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa8,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s index 5772fb30391f8..476ea846f603a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s v_cmpx_class_f16_e64 v1, v2 // GFX12: v_cmpx_class_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s new file mode 100644 index 0000000000000..8a586de63b508 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s @@ -0,0 +1,2595 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_class_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_class_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_class_f16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_class_f16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_class_f16_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_class_f16_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x01,0xfd,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30] + +v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_class_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_class_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_class_f32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_class_f32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_class_f32_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_class_f32_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x01,0xfe,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30] + +v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_eq_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_eq_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x82,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_eq_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x82,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x82,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_eq_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_eq_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x92,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_eq_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x92,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x92,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_eq_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_eq_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_eq_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_eq_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_eq_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_eq_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_eq_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_eq_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x86,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x86,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ge_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ge_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x96,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ge_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x96,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x96,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_ge_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ge_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_ge_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ge_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_ge_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ge_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_ge_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ge_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_gt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_gt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x84,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_gt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x84,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x84,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_gt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_gt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x94,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_gt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x94,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x94,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_gt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_gt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_gt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_gt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_gt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_gt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_gt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_gt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_le_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_le_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_le_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x83,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_le_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_le_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x83,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x83,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_le_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_le_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_le_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x93,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_le_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_le_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x93,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x93,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_le_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_le_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_le_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_le_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_le_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_le_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_le_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_le_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_le_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_le_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_le_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_le_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_le_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_le_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_le_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_le_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_lg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_lg_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lg_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x85,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_lg_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lg_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x85,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x85,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_lg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_lg_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lg_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x95,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_lg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x95,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_lt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x91,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x91,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x91,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_lt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_lt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_lt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_lt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_ne_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ne_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_ne_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ne_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_ne_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ne_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cmpx_ne_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ne_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_neq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_neq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_neq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_neq_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_neq_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_neq_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_neq_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_neq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_neq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_neq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_neq_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_neq_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_neq_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_neq_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_nge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_nge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_nge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x89,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_nge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_nge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x89,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x89,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_nge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_nge_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_nge_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x99,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_nge_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_nge_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x99,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x99,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ngt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ngt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_ngt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ngt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_ngt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_ngt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_ngt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_ngt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nle_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nle_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_nle_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_nle_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_nle_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_nle_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_nle_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nle_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nle_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_nle_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_nle_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_nle_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_nle_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_nle_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_nlg_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_nlg_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_nlg_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_nlg_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_nlg_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_nlg_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_nlg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_nlg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_nlt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_nlt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_nlt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_nlt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_nlt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_nlt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_nlt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_nlt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_o_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_o_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_o_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_o_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_o_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x87,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_o_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_o_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x87,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x87,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_o_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_o_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_o_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_o_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_o_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x97,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_o_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_o_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x97,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x97,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_u_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_u_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_u_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_u_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_u_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x88,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_u_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_u_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x88,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x88,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_u_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_u_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 row_mirror +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 row_half_mirror +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 row_shl:1 +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 row_shl:15 +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 row_shr:1 +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 row_shr:15 +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 row_ror:1 +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 row_ror:15 +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cmpx_u_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cmpx_u_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_u_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x98,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_u_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_u_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x98,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_u_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_u_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x98,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s index 57f89d3e31e7c..a6953ecc1d78a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s new file mode 100644 index 0000000000000..0b2383456b6e3 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s @@ -0,0 +1,897 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_class_f16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_class_f16_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x01,0xfd,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] + +v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_class_f32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_class_f32_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xea,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_class_f32_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x01,0xfe,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] + +v_cmpx_eq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x82,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x82,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x82,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x82,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x82,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_eq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x92,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x92,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x92,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x92,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x92,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x92,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_eq_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc2,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_eq_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc2,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_eq_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xca,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_eq_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_eq_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xca,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_ge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x86,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x86,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_ge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x96,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x96,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x96,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x96,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x96,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x96,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ge_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc6,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ge_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc6,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ge_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xce,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ge_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ge_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xce,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_gt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x84,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x84,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x84,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x84,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x84,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_gt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x94,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x94,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x94,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x94,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x94,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x94,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_gt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc4,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_gt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc4,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_gt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcc,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_gt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_gt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xcc,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_le_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x83,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x83,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x83,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x83,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x83,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_le_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x93,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x93,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x93,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x93,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x93,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x93,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_le_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc3,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_le_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc3,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_le_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcb,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_le_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_le_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xcb,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_lg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x85,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x85,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x85,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x85,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x85,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_lg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x95,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x95,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x95,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x95,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x95,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x95,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x81,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x91,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x91,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x91,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x91,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x91,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc1,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc1,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc9,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc9,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ne_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc5,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ne_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc5,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ne_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ne_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ne_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xcd,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cmpx_neq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_neq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_neq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_neq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8d,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_neq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8d,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_neq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_neq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_neq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_neq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9d,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_neq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9d,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_nge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x89,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x89,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x89,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x89,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x89,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_nge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x99,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x99,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x99,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x99,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x99,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x99,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_ngt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ngt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ngt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ngt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8b,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ngt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8b,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_ngt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ngt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ngt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ngt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9b,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_ngt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9b,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_nle_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nle_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nle_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nle_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8c,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nle_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8c,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_nle_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nle_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nle_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nle_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9c,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nle_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9c,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_nlg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8a,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8a,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_nlg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9a,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9a,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_nlt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8e,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8e,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_nlt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9e,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_nlt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9e,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_o_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_o_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x87,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_o_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x87,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_o_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x87,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_o_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x87,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x87,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_o_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x97,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_o_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x97,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_o_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x97,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_o_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x97,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_o_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x97,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x97,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_u_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_u_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x88,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_u_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x88,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_u_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x88,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_u_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x88,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x88,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_u_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x98,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_u_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x98,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_u_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x98,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_u_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x98,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_u_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x98,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_u_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x98,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s index d69a17e76d555..8e2899086f2bd 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] From 222ff186087f0f1d9976ac2512dca52d90e13472 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Thu, 9 Jan 2025 13:40:13 -0500 Subject: [PATCH 28/79] [AMDGPU][True16][CodeGen] Update codegen pattern for v_med3_f16 (#121992) true16 codegen pattern for v_med3_f16 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 2 + llvm/test/CodeGen/AMDGPU/fmed3.ll | 180 ++++++++++++++++++----- 2 files changed, 147 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index ee83dff227a85..4325ab448e581 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3670,6 +3670,8 @@ defm : FPMed3Pat; let SubtargetPredicate = HasMed3_16 in { let True16Predicate = NotHasTrue16BitInsts in defm : FPMed3Pat; +let True16Predicate = UseRealTrue16Insts in +defm : FPMed3Pat; let True16Predicate = UseFakeTrue16Insts in defm : FPMed3Pat; } diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll index 1cdcf276c526b..f490ecf68d984 100644 --- a/llvm/test/CodeGen/AMDGPU/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll @@ -5,8 +5,10 @@ ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32: @@ -7531,19 +7533,61 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %o ; GFX9-NEXT: global_store_short v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm ; -; GFX11-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 -; GFX11-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 -; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] -; GFX11-NEXT: s_endpgm +; GFX11-SDAG-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: +; GFX11-SDAG-FAKE16: ; %bb.0: +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 +; GFX11-SDAG-FAKE16-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX11-GISEL-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: +; GFX11-GISEL-FAKE16: ; %bb.0: +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 +; GFX11-GISEL-FAKE16-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX11-SDAG-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: +; GFX11-SDAG-TRUE16: ; %bb.0: +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, 2.0, 4.0 +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-SDAG-TRUE16-NEXT: s_endpgm +; +; GFX11-GISEL-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: +; GFX11-GISEL-TRUE16: ; %bb.0: +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, 2.0, 4.0 +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] +; GFX11-GISEL-TRUE16-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid %outgep = getelementptr half, ptr addrspace(1) %out, i32 %tid @@ -7723,26 +7767,92 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, pt ; GFX9-NEXT: global_store_short v0, v1, s[8:9] ; GFX9-NEXT: s_endpgm ; -; GFX11-LABEL: v_nnan_inputs_med3_f16_pat0: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 -; GFX11-NEXT: v_add_f16_e32 v2, 2.0, v2 -; GFX11-NEXT: v_add_f16_e32 v3, 4.0, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_med3_f16 v1, v1, v2, v3 -; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] -; GFX11-NEXT: s_endpgm +; GFX11-SDAG-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0: +; GFX11-SDAG-FAKE16: ; %bb.0: +; GFX11-SDAG-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 +; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v2, 2.0, v2 +; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v3, 4.0, v3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-FAKE16-NEXT: v_med3_f16 v1, v1, v2, v3 +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX11-GISEL-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0: +; GFX11-GISEL-FAKE16: ; %bb.0: +; GFX11-GISEL-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 +; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v2, 2.0, v2 +; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v3, 4.0, v3 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-FAKE16-NEXT: v_med3_f16 v1, v1, v2, v3 +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX11-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX11-SDAG-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0: +; GFX11-SDAG-TRUE16: ; %bb.0: +; GFX11-SDAG-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v1, v2, s[4:5] glc dlc +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v3, v2, s[6:7] glc dlc +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v1.l, 4.0, v1.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, v0.h, v1.l +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-SDAG-TRUE16-NEXT: s_endpgm +; +; GFX11-GISEL-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0: +; GFX11-GISEL-TRUE16: ; %bb.0: +; GFX11-GISEL-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v2, s[4:5] glc dlc +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v3, v2, s[6:7] glc dlc +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.h, 2.0, v1.l +; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v1.l, 4.0, v3.l +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, v0.h, v1.l +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX11-GISEL-TRUE16-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid %gep1 = getelementptr half, ptr addrspace(1) %bptr, i32 %tid From 031f33cca3c953dd09ac439fdb503fb3cb36af5e Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 9 Jan 2025 10:41:21 -0800 Subject: [PATCH 29/79] [RISCV] Add tests for legalization of and shuffles --- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 234 ++++++++++++++++++ .../rvv/fixed-vectors-shuffle-exact-vlen.ll | 180 ++++++++++++++ 2 files changed, 414 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 0bd8466669dc8..8915603471ec7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -1141,3 +1141,237 @@ define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) { %out = shufflevector <16 x i32> %splat, <16 x i32> %w, <16 x i32> ret <16 x i32> %out } + +define <4 x i128> @shuffle_i128(<4 x i128> %a) { +; RV32-LABEL: shuffle_i128: +; RV32: # %bb.0: +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a3, 4(a1) +; RV32-NEXT: lw a4, 8(a1) +; RV32-NEXT: lw a5, 12(a1) +; RV32-NEXT: lw a6, 48(a1) +; RV32-NEXT: lw a7, 52(a1) +; RV32-NEXT: lw t0, 56(a1) +; RV32-NEXT: lw t1, 60(a1) +; RV32-NEXT: lw t2, 32(a1) +; RV32-NEXT: lw t3, 36(a1) +; RV32-NEXT: lw t4, 40(a1) +; RV32-NEXT: lw a1, 44(a1) +; RV32-NEXT: sw t2, 48(a0) +; RV32-NEXT: sw t3, 52(a0) +; RV32-NEXT: sw t4, 56(a0) +; RV32-NEXT: sw a1, 60(a0) +; RV32-NEXT: sw a6, 32(a0) +; RV32-NEXT: sw a7, 36(a0) +; RV32-NEXT: sw t0, 40(a0) +; RV32-NEXT: sw t1, 44(a0) +; RV32-NEXT: sw a2, 16(a0) +; RV32-NEXT: sw a3, 20(a0) +; RV32-NEXT: sw a4, 24(a0) +; RV32-NEXT: sw a5, 28(a0) +; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a3, 4(a0) +; RV32-NEXT: sw a4, 8(a0) +; RV32-NEXT: sw a5, 12(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_i128: +; RV64: # %bb.0: +; RV64-NEXT: ld a2, 48(a1) +; RV64-NEXT: ld a3, 56(a1) +; RV64-NEXT: ld a4, 0(a1) +; RV64-NEXT: ld a5, 8(a1) +; RV64-NEXT: ld a6, 32(a1) +; RV64-NEXT: ld a1, 40(a1) +; RV64-NEXT: sd a2, 32(a0) +; RV64-NEXT: sd a3, 40(a0) +; RV64-NEXT: sd a6, 48(a0) +; RV64-NEXT: sd a1, 56(a0) +; RV64-NEXT: sd a4, 0(a0) +; RV64-NEXT: sd a5, 8(a0) +; RV64-NEXT: sd a4, 16(a0) +; RV64-NEXT: sd a5, 24(a0) +; RV64-NEXT: ret + %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> + ret <4 x i128> %res +} + +define void @shuffle_i128_ldst(ptr %p) { +; RV32-LABEL: shuffle_i128_ldst: +; RV32: # %bb.0: +; RV32-NEXT: lw a1, 48(a0) +; RV32-NEXT: lw a2, 52(a0) +; RV32-NEXT: lw a3, 56(a0) +; RV32-NEXT: lw a4, 60(a0) +; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: lw a6, 4(a0) +; RV32-NEXT: lw a7, 8(a0) +; RV32-NEXT: lw t0, 12(a0) +; RV32-NEXT: lw t1, 32(a0) +; RV32-NEXT: lw t2, 36(a0) +; RV32-NEXT: lw t3, 40(a0) +; RV32-NEXT: lw t4, 44(a0) +; RV32-NEXT: sw t1, 48(a0) +; RV32-NEXT: sw t2, 52(a0) +; RV32-NEXT: sw t3, 56(a0) +; RV32-NEXT: sw t4, 60(a0) +; RV32-NEXT: sw a5, 16(a0) +; RV32-NEXT: sw a6, 20(a0) +; RV32-NEXT: sw a7, 24(a0) +; RV32-NEXT: sw t0, 28(a0) +; RV32-NEXT: sw a1, 32(a0) +; RV32-NEXT: sw a2, 36(a0) +; RV32-NEXT: sw a3, 40(a0) +; RV32-NEXT: sw a4, 44(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_i128_ldst: +; RV64: # %bb.0: +; RV64-NEXT: ld a1, 0(a0) +; RV64-NEXT: ld a2, 8(a0) +; RV64-NEXT: ld a3, 32(a0) +; RV64-NEXT: ld a4, 40(a0) +; RV64-NEXT: ld a5, 48(a0) +; RV64-NEXT: ld a6, 56(a0) +; RV64-NEXT: sd a3, 48(a0) +; RV64-NEXT: sd a4, 56(a0) +; RV64-NEXT: sd a1, 16(a0) +; RV64-NEXT: sd a2, 24(a0) +; RV64-NEXT: sd a5, 32(a0) +; RV64-NEXT: sd a6, 40(a0) +; RV64-NEXT: ret + %a = load <4 x i128>, ptr %p + %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> + store <4 x i128> %res, ptr %p + ret void +} + +define void @shuffle_i256_ldst(ptr %p) { +; RV32-LABEL: shuffle_i256_ldst: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: .cfi_offset s1, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: .cfi_offset s3, -16 +; RV32-NEXT: .cfi_offset s4, -20 +; RV32-NEXT: .cfi_offset s5, -24 +; RV32-NEXT: .cfi_offset s6, -28 +; RV32-NEXT: .cfi_offset s7, -32 +; RV32-NEXT: .cfi_offset s8, -36 +; RV32-NEXT: .cfi_offset s9, -40 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a2, 4(a0) +; RV32-NEXT: lw a3, 8(a0) +; RV32-NEXT: lw a4, 12(a0) +; RV32-NEXT: lw a5, 16(a0) +; RV32-NEXT: lw a6, 20(a0) +; RV32-NEXT: lw a7, 24(a0) +; RV32-NEXT: lw t0, 28(a0) +; RV32-NEXT: lw t1, 96(a0) +; RV32-NEXT: lw t2, 100(a0) +; RV32-NEXT: lw t3, 104(a0) +; RV32-NEXT: lw t4, 108(a0) +; RV32-NEXT: lw t5, 112(a0) +; RV32-NEXT: lw t6, 116(a0) +; RV32-NEXT: lw s0, 120(a0) +; RV32-NEXT: lw s1, 124(a0) +; RV32-NEXT: lw s2, 64(a0) +; RV32-NEXT: lw s3, 68(a0) +; RV32-NEXT: lw s4, 72(a0) +; RV32-NEXT: lw s5, 76(a0) +; RV32-NEXT: lw s6, 80(a0) +; RV32-NEXT: lw s7, 84(a0) +; RV32-NEXT: lw s8, 88(a0) +; RV32-NEXT: lw s9, 92(a0) +; RV32-NEXT: sw s6, 112(a0) +; RV32-NEXT: sw s7, 116(a0) +; RV32-NEXT: sw s8, 120(a0) +; RV32-NEXT: sw s9, 124(a0) +; RV32-NEXT: sw s2, 96(a0) +; RV32-NEXT: sw s3, 100(a0) +; RV32-NEXT: sw s4, 104(a0) +; RV32-NEXT: sw s5, 108(a0) +; RV32-NEXT: sw t5, 80(a0) +; RV32-NEXT: sw t6, 84(a0) +; RV32-NEXT: sw s0, 88(a0) +; RV32-NEXT: sw s1, 92(a0) +; RV32-NEXT: sw t1, 64(a0) +; RV32-NEXT: sw t2, 68(a0) +; RV32-NEXT: sw t3, 72(a0) +; RV32-NEXT: sw t4, 76(a0) +; RV32-NEXT: sw a5, 48(a0) +; RV32-NEXT: sw a6, 52(a0) +; RV32-NEXT: sw a7, 56(a0) +; RV32-NEXT: sw t0, 60(a0) +; RV32-NEXT: sw a1, 32(a0) +; RV32-NEXT: sw a2, 36(a0) +; RV32-NEXT: sw a3, 40(a0) +; RV32-NEXT: sw a4, 44(a0) +; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore s1 +; RV32-NEXT: .cfi_restore s2 +; RV32-NEXT: .cfi_restore s3 +; RV32-NEXT: .cfi_restore s4 +; RV32-NEXT: .cfi_restore s5 +; RV32-NEXT: .cfi_restore s6 +; RV32-NEXT: .cfi_restore s7 +; RV32-NEXT: .cfi_restore s8 +; RV32-NEXT: .cfi_restore s9 +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_i256_ldst: +; RV64: # %bb.0: +; RV64-NEXT: ld a1, 96(a0) +; RV64-NEXT: ld a2, 104(a0) +; RV64-NEXT: ld a3, 112(a0) +; RV64-NEXT: ld a4, 120(a0) +; RV64-NEXT: ld a5, 0(a0) +; RV64-NEXT: ld a6, 8(a0) +; RV64-NEXT: ld a7, 16(a0) +; RV64-NEXT: ld t0, 24(a0) +; RV64-NEXT: ld t1, 64(a0) +; RV64-NEXT: ld t2, 72(a0) +; RV64-NEXT: ld t3, 80(a0) +; RV64-NEXT: ld t4, 88(a0) +; RV64-NEXT: sd t1, 96(a0) +; RV64-NEXT: sd t2, 104(a0) +; RV64-NEXT: sd t3, 112(a0) +; RV64-NEXT: sd t4, 120(a0) +; RV64-NEXT: sd a5, 32(a0) +; RV64-NEXT: sd a6, 40(a0) +; RV64-NEXT: sd a7, 48(a0) +; RV64-NEXT: sd t0, 56(a0) +; RV64-NEXT: sd a1, 64(a0) +; RV64-NEXT: sd a2, 72(a0) +; RV64-NEXT: sd a3, 80(a0) +; RV64-NEXT: sd a4, 88(a0) +; RV64-NEXT: ret + %a = load <4 x i256>, ptr %p + %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> + store <4 x i256> %res, ptr %p + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index bb05eb5368ae9..4603c0d24f5d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -400,3 +400,183 @@ entry: %conv199 = sext i32 %4 to i64 ret i64 %conv199 } + +define void @shuffle_i128_ldst(ptr %p) vscale_range(2,2) { +; RV32-LABEL: shuffle_i128_ldst: +; RV32: # %bb.0: +; RV32-NEXT: lw a1, 48(a0) +; RV32-NEXT: lw a2, 52(a0) +; RV32-NEXT: lw a3, 56(a0) +; RV32-NEXT: lw a4, 60(a0) +; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: lw a6, 4(a0) +; RV32-NEXT: lw a7, 8(a0) +; RV32-NEXT: lw t0, 12(a0) +; RV32-NEXT: lw t1, 32(a0) +; RV32-NEXT: lw t2, 36(a0) +; RV32-NEXT: lw t3, 40(a0) +; RV32-NEXT: lw t4, 44(a0) +; RV32-NEXT: sw t1, 48(a0) +; RV32-NEXT: sw t2, 52(a0) +; RV32-NEXT: sw t3, 56(a0) +; RV32-NEXT: sw t4, 60(a0) +; RV32-NEXT: sw a5, 16(a0) +; RV32-NEXT: sw a6, 20(a0) +; RV32-NEXT: sw a7, 24(a0) +; RV32-NEXT: sw t0, 28(a0) +; RV32-NEXT: sw a1, 32(a0) +; RV32-NEXT: sw a2, 36(a0) +; RV32-NEXT: sw a3, 40(a0) +; RV32-NEXT: sw a4, 44(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_i128_ldst: +; RV64: # %bb.0: +; RV64-NEXT: ld a1, 0(a0) +; RV64-NEXT: ld a2, 8(a0) +; RV64-NEXT: ld a3, 32(a0) +; RV64-NEXT: ld a4, 40(a0) +; RV64-NEXT: ld a5, 48(a0) +; RV64-NEXT: ld a6, 56(a0) +; RV64-NEXT: sd a3, 48(a0) +; RV64-NEXT: sd a4, 56(a0) +; RV64-NEXT: sd a1, 16(a0) +; RV64-NEXT: sd a2, 24(a0) +; RV64-NEXT: sd a5, 32(a0) +; RV64-NEXT: sd a6, 40(a0) +; RV64-NEXT: ret + %a = load <4 x i128>, ptr %p + %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> + store <4 x i128> %res, ptr %p + ret void +} + +define void @shuffle_i256_ldst(ptr %p) vscale_range(2,2) { +; RV32-LABEL: shuffle_i256_ldst: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: .cfi_offset s1, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: .cfi_offset s3, -16 +; RV32-NEXT: .cfi_offset s4, -20 +; RV32-NEXT: .cfi_offset s5, -24 +; RV32-NEXT: .cfi_offset s6, -28 +; RV32-NEXT: .cfi_offset s7, -32 +; RV32-NEXT: .cfi_offset s8, -36 +; RV32-NEXT: .cfi_offset s9, -40 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a2, 4(a0) +; RV32-NEXT: lw a3, 8(a0) +; RV32-NEXT: lw a4, 12(a0) +; RV32-NEXT: lw a5, 16(a0) +; RV32-NEXT: lw a6, 20(a0) +; RV32-NEXT: lw a7, 24(a0) +; RV32-NEXT: lw t0, 28(a0) +; RV32-NEXT: lw t1, 96(a0) +; RV32-NEXT: lw t2, 100(a0) +; RV32-NEXT: lw t3, 104(a0) +; RV32-NEXT: lw t4, 108(a0) +; RV32-NEXT: lw t5, 112(a0) +; RV32-NEXT: lw t6, 116(a0) +; RV32-NEXT: lw s0, 120(a0) +; RV32-NEXT: lw s1, 124(a0) +; RV32-NEXT: lw s2, 64(a0) +; RV32-NEXT: lw s3, 68(a0) +; RV32-NEXT: lw s4, 72(a0) +; RV32-NEXT: lw s5, 76(a0) +; RV32-NEXT: lw s6, 80(a0) +; RV32-NEXT: lw s7, 84(a0) +; RV32-NEXT: lw s8, 88(a0) +; RV32-NEXT: lw s9, 92(a0) +; RV32-NEXT: sw s6, 112(a0) +; RV32-NEXT: sw s7, 116(a0) +; RV32-NEXT: sw s8, 120(a0) +; RV32-NEXT: sw s9, 124(a0) +; RV32-NEXT: sw s2, 96(a0) +; RV32-NEXT: sw s3, 100(a0) +; RV32-NEXT: sw s4, 104(a0) +; RV32-NEXT: sw s5, 108(a0) +; RV32-NEXT: sw t5, 80(a0) +; RV32-NEXT: sw t6, 84(a0) +; RV32-NEXT: sw s0, 88(a0) +; RV32-NEXT: sw s1, 92(a0) +; RV32-NEXT: sw t1, 64(a0) +; RV32-NEXT: sw t2, 68(a0) +; RV32-NEXT: sw t3, 72(a0) +; RV32-NEXT: sw t4, 76(a0) +; RV32-NEXT: sw a5, 48(a0) +; RV32-NEXT: sw a6, 52(a0) +; RV32-NEXT: sw a7, 56(a0) +; RV32-NEXT: sw t0, 60(a0) +; RV32-NEXT: sw a1, 32(a0) +; RV32-NEXT: sw a2, 36(a0) +; RV32-NEXT: sw a3, 40(a0) +; RV32-NEXT: sw a4, 44(a0) +; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore s1 +; RV32-NEXT: .cfi_restore s2 +; RV32-NEXT: .cfi_restore s3 +; RV32-NEXT: .cfi_restore s4 +; RV32-NEXT: .cfi_restore s5 +; RV32-NEXT: .cfi_restore s6 +; RV32-NEXT: .cfi_restore s7 +; RV32-NEXT: .cfi_restore s8 +; RV32-NEXT: .cfi_restore s9 +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_i256_ldst: +; RV64: # %bb.0: +; RV64-NEXT: ld a1, 96(a0) +; RV64-NEXT: ld a2, 104(a0) +; RV64-NEXT: ld a3, 112(a0) +; RV64-NEXT: ld a4, 120(a0) +; RV64-NEXT: ld a5, 0(a0) +; RV64-NEXT: ld a6, 8(a0) +; RV64-NEXT: ld a7, 16(a0) +; RV64-NEXT: ld t0, 24(a0) +; RV64-NEXT: ld t1, 64(a0) +; RV64-NEXT: ld t2, 72(a0) +; RV64-NEXT: ld t3, 80(a0) +; RV64-NEXT: ld t4, 88(a0) +; RV64-NEXT: sd t1, 96(a0) +; RV64-NEXT: sd t2, 104(a0) +; RV64-NEXT: sd t3, 112(a0) +; RV64-NEXT: sd t4, 120(a0) +; RV64-NEXT: sd a5, 32(a0) +; RV64-NEXT: sd a6, 40(a0) +; RV64-NEXT: sd a7, 48(a0) +; RV64-NEXT: sd t0, 56(a0) +; RV64-NEXT: sd a1, 64(a0) +; RV64-NEXT: sd a2, 72(a0) +; RV64-NEXT: sd a3, 80(a0) +; RV64-NEXT: sd a4, 88(a0) +; RV64-NEXT: ret + %a = load <4 x i256>, ptr %p + %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> + store <4 x i256> %res, ptr %p + ret void +} From c036a9a2c2130f6e80e2969d4a8fad96265e4a11 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 07:56:56 -0800 Subject: [PATCH 30/79] [RISCV][VLOPT] Add vector single width floating point add subtract instructions to isSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 6 ++ .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 26 ++--- llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 100 ++++++++++++++++++ 3 files changed, 116 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 2c04dd062670f..23a1cf392ec1e 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -983,6 +983,12 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMSOF_M: case RISCV::VIOTA_M: case RISCV::VID_V: + // Vector Single-Width Floating-Point Add/Subtract Instructions + case RISCV::VFADD_VF: + case RISCV::VFADD_VV: + case RISCV::VFSUB_VF: + case RISCV::VFSUB_VV: + case RISCV::VFRSUB_VF: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 66952cac8e00d..ce23dd0eac203 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -93,12 +93,11 @@ define void @fadd_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -229,12 +228,11 @@ define void @fsub_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2330,13 +2328,12 @@ define void @fadd_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2472,13 +2469,12 @@ define void @fadd_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2614,13 +2610,12 @@ define void @fsub_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2756,13 +2751,12 @@ define void @fsub_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -5004,13 +4998,13 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -5181,13 +5175,13 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 55a50a15c788c..a53b837db5461 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -2925,3 +2925,103 @@ define @vid.v( %c, iXLen %vl) { %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %c, iXLen %vl) ret %2 } + +define @vfadd_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfadd_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfadd_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfadd_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfadd_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfadd_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfadd.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfsub_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfsub_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfsub.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfsub_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfsub.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfsub.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfsub_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfsub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfsub.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfsub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfsub.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfsub.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfrsub_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfrsub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfrsub.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfrsub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfrsub.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfrsub.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} From 8beb9d393deaafde5ce7323a71cd0b0b1553444d Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 08:22:16 -0800 Subject: [PATCH 31/79] [RISCV][VLOPT] Add vector widening floating point add subtract instructions to isSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 9 + llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 168 +++++++++++++++++++ 2 files changed, 177 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 23a1cf392ec1e..018556d27d763 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -989,6 +989,15 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VFSUB_VF: case RISCV::VFSUB_VV: case RISCV::VFRSUB_VF: + // Vector Widening Floating-Point Add/Subtract Instructions + case RISCV::VFWADD_VV: + case RISCV::VFWADD_VF: + case RISCV::VFWSUB_VV: + case RISCV::VFWSUB_VF: + case RISCV::VFWADD_WF: + case RISCV::VFWADD_WV: + case RISCV::VFWSUB_WF: + case RISCV::VFWSUB_WV: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index a53b837db5461..804a8a614a820 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3025,3 +3025,171 @@ define @vfrsub_vx( %a, float %b, iXLen %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) ret %2 } + +define @vfwadd_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.vf v12, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.vf v12, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.vf v12, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.vf v12, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_wv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_wv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.wv v8, v8, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_wv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.wv v8, v8, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_wf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_wf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.wf v8, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_wf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.wf v8, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_wv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_wv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.wv v8, v8, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_wv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.wv v8, v8, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_wf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_wf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.wf v8, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_wf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.wf v8, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} From a484fa1d0a974680d609d3f08ee6d57c9e3d21f8 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 08:28:59 -0800 Subject: [PATCH 32/79] [RISCV][VLOPT] Add floating point multiply divide instructions to getSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 6 ++ .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 24 ++--- llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 100 ++++++++++++++++++ 3 files changed, 114 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 018556d27d763..bf6a8068413ac 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -998,6 +998,12 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VFWADD_WV: case RISCV::VFWSUB_WF: case RISCV::VFWSUB_WV: + // Vector Single-Width Floating-Point Multiply/Divide Instructions + case RISCV::VFMUL_VF: + case RISCV::VFMUL_VV: + case RISCV::VFDIV_VF: + case RISCV::VFDIV_VV: + case RISCV::VFRDIV_VF: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index ce23dd0eac203..b8710a518287a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -363,12 +363,11 @@ define void @fmul_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -499,12 +498,11 @@ define void @fdiv_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2892,13 +2890,12 @@ define void @fmul_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -3034,13 +3031,12 @@ define void @fmul_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -3176,13 +3172,12 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -3318,13 +3313,12 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -4993,12 +4987,11 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 @@ -5170,12 +5163,11 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 804a8a614a820..bdacd17fe75c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3193,3 +3193,103 @@ define @vfwsub_wf( %a, float %b, iXLe %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) ret %2 } + +define @vfmul_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfmul_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfmul.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfmul_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfmul.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfmul.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfmul_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfmul_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfmul.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfmul_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfmul.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfmul.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfdiv_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfdiv_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfdiv.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfdiv_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfdiv.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfdiv.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfdiv_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfdiv_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfdiv.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfdiv_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfdiv.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfdiv.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfrdiv_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfrdiv_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfrdiv.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfrdiv_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfrdiv.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfrdiv.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} From b419edeec34844cfc31665248d670d777499340d Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 09:55:05 -0800 Subject: [PATCH 33/79] [RISCV][VLOPT] Add widening floating point multiply to isSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 3 ++ llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 42 ++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index bf6a8068413ac..bc7d68fad22d0 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1004,6 +1004,9 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VFDIV_VF: case RISCV::VFDIV_VV: case RISCV::VFRDIV_VF: + // Vector Widening Floating-Point Multiply + case RISCV::VFWMUL_VF: + case RISCV::VFWMUL_VV: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index bdacd17fe75c0..46cca43a1be89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3293,3 +3293,45 @@ define @vfrdiv_vf( %a, float %b, iXLen %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) ret %2 } + +define @vfwmul_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwmul_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwmul.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmul_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwmul.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwmul_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwmul_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwmul.vf v12, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmul_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwmul.vf v12, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} From 5f70fea79fdb947ab7820aa47e5a1885691ef73a Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 10:01:15 -0800 Subject: [PATCH 34/79] [RISCV][VLOPT] Add Vector Floating-Point Compare Instructions to getSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 11 + llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 200 +++++++++++++++++++ 2 files changed, 211 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index bc7d68fad22d0..6661921d66f95 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1007,6 +1007,17 @@ static bool isSupportedInstr(const MachineInstr &MI) { // Vector Widening Floating-Point Multiply case RISCV::VFWMUL_VF: case RISCV::VFWMUL_VV: + // Vector Floating-Point Compare Instructions + case RISCV::VMFEQ_VF: + case RISCV::VMFEQ_VV: + case RISCV::VMFNE_VF: + case RISCV::VMFNE_VV: + case RISCV::VMFLT_VF: + case RISCV::VMFLT_VV: + case RISCV::VMFLE_VF: + case RISCV::VMFLE_VV: + case RISCV::VMFGT_VF: + case RISCV::VMFGE_VF: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 46cca43a1be89..46fbba35c35a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3335,3 +3335,203 @@ define @vfwmul_vf( %a, float %b, iXLen %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) ret %2 } + +define @vmfeq_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfeq_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfeq.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfeq_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfeq.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfeq.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfeq_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfeq_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfeq.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfeq_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfeq.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfeq.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfne_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfne_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfne.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfne_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfne.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfne.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfne_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfne_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfne.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfne_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfne.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfne.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmflt_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmflt_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmflt.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmflt_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmflt.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmflt.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmflt_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmflt_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmflt.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmflt_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmflt.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmflt.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfle_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfle_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfle.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfle_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfle.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfle.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfle_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfle_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfle.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfle_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfle.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfle.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfgt_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfgt_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfgt.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfgt_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfgt.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfgt.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfgt_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfgt_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmflt.vv v12, v10, v8 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfgt_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmflt.vv v12, v10, v8 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfgt.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} From ca10deaa72c37fb5eee8bf1c95466b12a8773e95 Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Thu, 9 Jan 2025 13:53:59 -0500 Subject: [PATCH 35/79] [z/OS] Add backtrace support for z/OS. (#121826) The system call `__CELQTBCK()` is used to build a backtrace like on other systems. The collected information are the address of the PC, the address of the entry point (EP), the difference between both addresses (+EP), the dynamic storage area (DSA aka the stack pointer), and the function name. The system call is described here: https://www.ibm.com/docs/en/zos/3.1.0?topic=cwicsa6a-celqtbck-also-known-as-celqtbck-64-bit-traceback-service --- llvm/lib/Support/Unix/Signals.inc | 78 +++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc index 088ca33e3c8c5..50d6248ba6af8 100644 --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -79,6 +79,10 @@ #undef HAVE__UNWIND_BACKTRACE #endif #endif +#if ENABLE_BACKTRACES && defined(__MVS__) +#include "llvm/Support/ConvertEBCDIC.h" +#include <__le_cwi.h> +#endif using namespace llvm; @@ -708,6 +712,76 @@ static int unwindBacktrace(void **StackTrace, int MaxEntries) { } #endif +#if ENABLE_BACKTRACES && defined(__MVS__) +static void zosbacktrace(raw_ostream &OS) { + // A function name in the PPA1 can have length 16k. + constexpr size_t MAX_ENTRY_NAME = UINT16_MAX; + // Limit all other strings to 8 byte. + constexpr size_t MAX_OTHER = 8; + int32_t dsa_format = -1; // Input/Output + void *caaptr = _gtca(); // Input + int32_t member_id; // Output + char compile_unit_name[MAX_OTHER]; // Output + void *compile_unit_address; // Output + void *call_instruction_address = nullptr; // Input/Output + char entry_name[MAX_ENTRY_NAME]; // Output + void *entry_address; // Output + void *callers_instruction_address; // Output + void *callers_dsaptr; // Output + int32_t callers_dsa_format; // Output + char statement_id[MAX_OTHER]; // Output + void *cibptr; // Output + int32_t main_program; // Output + _FEEDBACK fc; // Output + + // The DSA pointer is the value of the stack pointer r4. + // __builtin_frame_address() returns a pointer to the stack frame, so the + // stack bias has to be considered to get the expected DSA value. + void *dsaptr = static_cast(__builtin_frame_address(0)) - 2048; + int count = 0; + OS << " DSA Adr EP +EP DSA " + " Entry\n"; + while (1) { + // After the call, these variables contain the length of the string. + int32_t compile_unit_name_length = sizeof(compile_unit_name); + int32_t entry_name_length = sizeof(entry_name); + int32_t statement_id_length = sizeof(statement_id); + // See + // https://www.ibm.com/docs/en/zos/3.1.0?topic=cwicsa6a-celqtbck-also-known-as-celqtbck-64-bit-traceback-service + // for documentation of the parameters. + __CELQTBCK(&dsaptr, &dsa_format, &caaptr, &member_id, &compile_unit_name[0], + &compile_unit_name_length, &compile_unit_address, + &call_instruction_address, &entry_name[0], &entry_name_length, + &entry_address, &callers_instruction_address, &callers_dsaptr, + &callers_dsa_format, &statement_id[0], &statement_id_length, + &cibptr, &main_program, &fc); + if (fc.tok_sev) { + OS << format("error: CELQTBCK returned severity %d message %d\n", + fc.tok_sev, fc.tok_msgno); + break; + } + + if (count) { // Omit first entry. + uintptr_t diff = reinterpret_cast(call_instruction_address) - + reinterpret_cast(entry_address); + OS << format(" %3d. 0x%016lX", count, call_instruction_address); + OS << format(" 0x%016lX +0x%08lX 0x%016lX", entry_address, diff, dsaptr); + SmallString<256> Str; + ConverterEBCDIC::convertToUTF8(StringRef(entry_name, entry_name_length), + Str); + OS << ' ' << Str << '\n'; + } + ++count; + if (callers_dsaptr) { + dsaptr = callers_dsaptr; + dsa_format = callers_dsa_format; + call_instruction_address = callers_instruction_address; + } else + break; + } +} +#endif + // In the case of a program crash or fault, print out a stack trace so that the // user has an indication of why and where we died. // @@ -715,6 +789,9 @@ static int unwindBacktrace(void **StackTrace, int MaxEntries) { // doesn't demangle symbols. void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) { #if ENABLE_BACKTRACES +#ifdef __MVS__ + zosbacktrace(OS); +#else static void *StackTrace[256]; int depth = 0; #if defined(HAVE_BACKTRACE) @@ -791,6 +868,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) { backtrace_symbols_fd(StackTrace, Depth, STDERR_FILENO); #endif #endif +#endif } static void PrintStackTraceSignalHandler(void *) { From c123d0c1df3363f309a2c0b7837297d3790d6ea5 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Thu, 9 Jan 2025 11:01:44 -0800 Subject: [PATCH 36/79] [HLSL][NFC] Move packoffset validation to separate function and calculate offsets in bytes (#121989) There will be more changes coming in to `SemaHLSL::ActOnFinishBuffer` so it would be good to move the packoffset validation out to a separate function. This change also unifies the units for cbuffer offset calculations to bytes. --- clang/include/clang/Basic/Attr.td | 6 +-- clang/lib/Sema/SemaHLSL.cpp | 80 ++++++++++++++++++------------- 2 files changed, 49 insertions(+), 37 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 12faf06597008..e51a74655dd5e 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -4711,9 +4711,9 @@ def HLSLPackOffset: HLSLAnnotationAttr { let Args = [IntArgument<"Subcomponent">, IntArgument<"Component">]; let Documentation = [HLSLPackOffsetDocs]; let AdditionalMembers = [{ - unsigned getOffset() { - return subcomponent * 4 + component; - } + unsigned getOffsetInBytes() { + return subcomponent * 16 + component * 4; + } }]; } diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 64b6fe4cd5eb4..65ddee05a2151 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -164,18 +164,20 @@ Decl *SemaHLSL::ActOnStartBuffer(Scope *BufferScope, bool CBuffer, return Result; } -// Calculate the size of a legacy cbuffer type based on +// Calculate the size of a legacy cbuffer type in bytes based on // https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules static unsigned calculateLegacyCbufferSize(const ASTContext &Context, QualType T) { unsigned Size = 0; - constexpr unsigned CBufferAlign = 128; + constexpr unsigned CBufferAlign = 16; if (const RecordType *RT = T->getAs()) { const RecordDecl *RD = RT->getDecl(); for (const FieldDecl *Field : RD->fields()) { QualType Ty = Field->getType(); unsigned FieldSize = calculateLegacyCbufferSize(Context, Ty); - unsigned FieldAlign = 32; + // FIXME: This is not the correct alignment, it does not work for 16-bit + // types. See llvm/llvm-project#119641. + unsigned FieldAlign = 4; if (Ty->isAggregateType()) FieldAlign = CBufferAlign; Size = llvm::alignTo(Size, FieldAlign); @@ -194,17 +196,19 @@ static unsigned calculateLegacyCbufferSize(const ASTContext &Context, calculateLegacyCbufferSize(Context, VT->getElementType()); Size = ElementSize * ElementCount; } else { - Size = Context.getTypeSize(T); + Size = Context.getTypeSize(T) / 8; } return Size; } -void SemaHLSL::ActOnFinishBuffer(Decl *Dcl, SourceLocation RBrace) { - auto *BufDecl = cast(Dcl); - BufDecl->setRBraceLoc(RBrace); - - // Validate packoffset. +// Validate packoffset: +// - if packoffset it used it must be set on all declarations inside the buffer +// - packoffset ranges must not overlap +static void validatePackoffset(Sema &S, HLSLBufferDecl *BufDecl) { llvm::SmallVector> PackOffsetVec; + + // Make sure the packoffset annotations are either on all declarations + // or on none. bool HasPackOffset = false; bool HasNonPackOffset = false; for (auto *Field : BufDecl->decls()) { @@ -219,33 +223,41 @@ void SemaHLSL::ActOnFinishBuffer(Decl *Dcl, SourceLocation RBrace) { } } - if (HasPackOffset && HasNonPackOffset) - Diag(BufDecl->getLocation(), diag::warn_hlsl_packoffset_mix); - - if (HasPackOffset) { - ASTContext &Context = getASTContext(); - // Make sure no overlap in packoffset. - // Sort PackOffsetVec by offset. - std::sort(PackOffsetVec.begin(), PackOffsetVec.end(), - [](const std::pair &LHS, - const std::pair &RHS) { - return LHS.second->getOffset() < RHS.second->getOffset(); - }); - - for (unsigned i = 0; i < PackOffsetVec.size() - 1; i++) { - VarDecl *Var = PackOffsetVec[i].first; - HLSLPackOffsetAttr *Attr = PackOffsetVec[i].second; - unsigned Size = calculateLegacyCbufferSize(Context, Var->getType()); - unsigned Begin = Attr->getOffset() * 32; - unsigned End = Begin + Size; - unsigned NextBegin = PackOffsetVec[i + 1].second->getOffset() * 32; - if (End > NextBegin) { - VarDecl *NextVar = PackOffsetVec[i + 1].first; - Diag(NextVar->getLocation(), diag::err_hlsl_packoffset_overlap) - << NextVar << Var; - } + if (!HasPackOffset) + return; + + if (HasNonPackOffset) + S.Diag(BufDecl->getLocation(), diag::warn_hlsl_packoffset_mix); + + // Make sure there is no overlap in packoffset - sort PackOffsetVec by offset + // and compare adjacent values. + ASTContext &Context = S.getASTContext(); + std::sort(PackOffsetVec.begin(), PackOffsetVec.end(), + [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.second->getOffsetInBytes() < + RHS.second->getOffsetInBytes(); + }); + for (unsigned i = 0; i < PackOffsetVec.size() - 1; i++) { + VarDecl *Var = PackOffsetVec[i].first; + HLSLPackOffsetAttr *Attr = PackOffsetVec[i].second; + unsigned Size = calculateLegacyCbufferSize(Context, Var->getType()); + unsigned Begin = Attr->getOffsetInBytes(); + unsigned End = Begin + Size; + unsigned NextBegin = PackOffsetVec[i + 1].second->getOffsetInBytes(); + if (End > NextBegin) { + VarDecl *NextVar = PackOffsetVec[i + 1].first; + S.Diag(NextVar->getLocation(), diag::err_hlsl_packoffset_overlap) + << NextVar << Var; } } +} + +void SemaHLSL::ActOnFinishBuffer(Decl *Dcl, SourceLocation RBrace) { + auto *BufDecl = cast(Dcl); + BufDecl->setRBraceLoc(RBrace); + + validatePackoffset(SemaRef, BufDecl); SemaRef.PopDeclContext(); } From 1c999072221371c9008cd0aec80e387777378de0 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 9 Jan 2025 14:05:27 -0500 Subject: [PATCH 37/79] [CUDA][HIP] Fix overriding of constexpr virtual function (#121986) In C++20 constexpr virtual function is allowed. In C++17 although non-pure virtual function is not allowed to be constexpr, pure virtual function is allowed to be constexpr and is allowed to be overriden by non-constexpr virtual function in the derived class. The following code compiles as C++: ``` class A { public: constexpr virtual int f() = 0; }; class B : public A { public: int f() override { return 42; } }; ``` However, it fails to compile as CUDA or HIP code. The reason: A::f() is implicitly host device function whereas B::f() is a host function. Since they have different targets, clang does not treat B::f() as an override of A::f(). Instead, it treats B::f() as a name-hiding non-virtual function for A::f(), and diagnoses it. This causes any CUDA/HIP program using C++ standard header file `` from g++-13 to fail to compile since such usage patten show up there: ``` /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/format:3564:34: error: non-virtual member function marked 'override' hides virtual member function 3564 | _M_format_arg(size_t __id) override | ^ /usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/format:3538:30: note: hidden overloaded virtual function 'std::__format::_Scanner::_M_format_arg' declared here 3538 | constexpr virtual void _M_format_arg(size_t __id) = 0; | ^ ``` This is a serious issue and there is no workaround. This patch allows non-constexpr function to override constexpr virtual function for CUDA and HIP. This should be OK since non-constexpr function without explicit host or device attribute can only be called in host functions. Fixes: SWDEV-507350 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Sema/SemaOverload.cpp | 21 +++++++++++++- clang/test/SemaCUDA/constexpr-virtual-func.cu | 28 +++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 clang/test/SemaCUDA/constexpr-virtual-func.cu diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c50260b548851..5ac886856c539 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1058,6 +1058,7 @@ RISC-V Support CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ +- Fixed a bug about overriding a constexpr pure-virtual member function with a non-constexpr virtual member function which causes compilation failure when including standard C++ header `format`. CUDA Support ^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 7589701fb81de..3be9ade80f1d9 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1309,6 +1309,13 @@ Sema::CheckOverload(Scope *S, FunctionDecl *New, const LookupResult &Old, return Ovl_Overload; } +template static bool hasExplicitAttr(const FunctionDecl *D) { + assert(D && "function decl should not be null"); + if (auto *A = D->getAttr()) + return !A->isImplicit(); + return false; +} + static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, FunctionDecl *Old, bool UseMemberUsingDeclRules, @@ -1583,6 +1590,7 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, return true; } + // At this point, it is known that the two functions have the same signature. if (SemaRef.getLangOpts().CUDA && ConsiderCudaAttrs) { // Don't allow overloading of destructors. (In theory we could, but it // would be a giant change to clang.) @@ -1595,8 +1603,19 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, // Allow overloading of functions with same signature and different CUDA // target attributes. - if (NewTarget != OldTarget) + if (NewTarget != OldTarget) { + // Special case: non-constexpr function is allowed to override + // constexpr virtual function + if (OldMethod && NewMethod && OldMethod->isVirtual() && + OldMethod->isConstexpr() && !NewMethod->isConstexpr() && + !hasExplicitAttr(Old) && + !hasExplicitAttr(Old) && + !hasExplicitAttr(New) && + !hasExplicitAttr(New)) { + return false; + } return true; + } } } } diff --git a/clang/test/SemaCUDA/constexpr-virtual-func.cu b/clang/test/SemaCUDA/constexpr-virtual-func.cu new file mode 100644 index 0000000000000..89d909181cd94 --- /dev/null +++ b/clang/test/SemaCUDA/constexpr-virtual-func.cu @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only \ +// RUN: -fcuda-is-device %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only %s + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +class A +{ +public: + constexpr virtual int f() = 0; +}; + +class B : public A +{ +public: + int f() override + { + return 42; + } +}; + +int test() +{ + B b; + return b.f(); +} From 9ec92873ecc1c268a1d05e36b7b52e378860ea5b Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 9 Jan 2025 14:19:03 -0500 Subject: [PATCH 38/79] Revert "[HLSL] Move length support out of the DirectX Backend (#121611)" This reverts commit a6b7181733c83523a39d4f4e788c6b7a227d477d. Breaks Clang :: CodeGenHLSL/builtins/length.hlsl, see https://github.com/llvm/llvm-project/pull/121611#issuecomment-2581004278 --- clang/include/clang/Basic/Builtins.td | 6 + clang/lib/CodeGen/CGBuiltin.cpp | 14 ++ clang/lib/CodeGen/CGHLSLRuntime.h | 1 + clang/lib/Headers/hlsl/hlsl_detail.h | 23 -- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 29 ++- clang/lib/Sema/SemaHLSL.cpp | 18 ++ clang/test/CodeGenHLSL/builtins/length.hlsl | 203 +++++++----------- .../test/SemaHLSL/BuiltIns/length-errors.hlsl | 51 ++--- llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 + .../Target/DirectX/DXILIntrinsicExpansion.cpp | 30 +++ llvm/test/CodeGen/DirectX/length.ll | 116 ++++++++++ llvm/test/CodeGen/DirectX/length_error.ll | 10 + .../DirectX/length_invalid_intrinsic_error.ll | 10 + .../length_invalid_intrinsic_error_scalar.ll | 10 + 14 files changed, 324 insertions(+), 198 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/length.ll create mode 100644 llvm/test/CodeGen/DirectX/length_error.ll create mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll create mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index f4216bd01a074..468c16050e2bf 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4865,6 +4865,12 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLLength : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_length"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLLerp : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_lerp"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2b09197669996..ca03fb665d423 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19334,6 +19334,20 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(), ArrayRef{X, Y, S}, nullptr, "hlsl.lerp"); } + case Builtin::BI__builtin_hlsl_length: { + Value *X = EmitScalarExpr(E->getArg(0)); + + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + "length operand must have a float representation"); + // if the operand is a scalar, we can use the fabs llvm intrinsic directly + if (!E->getArg(0)->getType()->isVectorType()) + return EmitFAbs(*this, X); + + return Builder.CreateIntrinsic( + /*ReturnType=*/X->getType()->getScalarType(), + CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef{X}, + nullptr, "hlsl.length"); + } case Builtin::BI__builtin_hlsl_normalize: { Value *X = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 00e110e8e6fa2..46e472f0aae21 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -77,6 +77,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross) GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees) GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac) + GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length) GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp) GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize) GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index 392075d276b18..8d5fd94133153 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -13,14 +13,6 @@ namespace hlsl { namespace __detail { -template struct is_same { - static const bool value = false; -}; - -template struct is_same { - static const bool value = true; -}; - template struct enable_if {}; template struct enable_if { @@ -41,21 +33,6 @@ constexpr enable_if_t bit_cast(T F) { return __builtin_bit_cast(U, F); } -template -constexpr enable_if_t::value || is_same::value, T> -length_impl(T X) { - return __builtin_elementwise_abs(X); -} - -template -enable_if_t::value || is_same::value, T> -length_vec_impl(vector X) { - vector XSquared = X * X; - T XSquaredSum = XSquared[0]; - [unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i]; - return __builtin_elementwise_sqrt(XSquaredSum); -} - } // namespace __detail } // namespace hlsl #endif //_HLSL_HLSL_DETAILS_H_ diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index cf287e598f76b..b745997f1d5a2 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -1297,16 +1297,27 @@ float4 lerp(float4, float4, float4); /// /// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...). -const inline half length(half X) { return __detail::length_impl(X); } -const inline float length(float X) { return __detail::length_impl(X); } - -template const inline half length(vector X) { - return __detail::length_vec_impl(X); -} +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) +half length(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) +half length(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) +half length(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) +half length(half4); -template const inline float length(vector X) { - return __detail::length_vec_impl(X); -} +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) +float length(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) +float length(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) +float length(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) +float length(float4); //===----------------------------------------------------------------------===// // log builtins diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 65ddee05a2151..83c38fad2df68 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2112,6 +2112,24 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_length: { + if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall)) + return true; + if (SemaRef.checkArgCount(TheCall, 1)) + return true; + + ExprResult A = TheCall->getArg(0); + QualType ArgTyA = A.get()->getType(); + QualType RetTy; + + if (auto *VTy = ArgTyA->getAs()) + RetTy = VTy->getElementType(); + else + RetTy = TheCall->getArg(0)->getType(); + + TheCall->setType(RetTy); + break; + } case Builtin::BI__builtin_hlsl_mad: { if (SemaRef.checkArgCount(TheCall, 3)) return true; diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl index ecf2edcf0b05f..a24f01d275440 100644 --- a/clang/test/CodeGenHLSL/builtins/length.hlsl +++ b/clang/test/CodeGenHLSL/builtins/length.hlsl @@ -1,130 +1,73 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -finclude-default-header -triple \ -// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -O1 -o - | FileCheck %s - -// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh( -// CHECK-SAME: half noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.fabs.f16(half [[P0]]) -// CHECK-NEXT: ret half [[ELT_ABS_I]] -// -half test_length_half(half p0) -{ - return length(p0); -} - -// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh( -// CHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x half> [[P0]], [[P0]] -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0 -// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1 -// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT_I]], [[VECEXT1_I]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I]]) -// CHECK-NEXT: ret half [[TMP0]] -// -half test_length_half2(half2 p0) -{ - return length(p0); -} - -// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh( -// CHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x half> [[P0]], [[P0]] -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0 -// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1 -// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]] -// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2 -// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_1]]) -// CHECK-NEXT: ret half [[TMP0]] -// -half test_length_half3(half3 p0) -{ - return length(p0); -} - -// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh( -// CHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x half> [[P0]], [[P0]] -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0 -// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1 -// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]] -// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2 -// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]] -// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3 -// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_2]], [[ADD_I_1]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_2]]) -// CHECK-NEXT: ret half [[TMP0]] -// -half test_length_half4(half4 p0) -{ - return length(p0); -} - - -// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf( -// CHECK-SAME: float noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.fabs.f32(float [[P0]]) -// CHECK-NEXT: ret float [[ELT_ABS_I]] -// -float test_length_float(float p0) -{ - return length(p0); -} - -// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f( -// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x float> [[P0]], [[P0]] -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0 -// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1 -// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT_I]], [[VECEXT1_I]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I]]) -// CHECK-NEXT: ret float [[TMP0]] -// -float test_length_float2(float2 p0) -{ - return length(p0); -} - -// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f( -// CHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x float> [[P0]], [[P0]] -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0 -// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1 -// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]] -// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2 -// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_1]]) -// CHECK-NEXT: ret float [[TMP0]] -// -float test_length_float3(float3 p0) -{ - return length(p0); -} - - -// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f( -// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x float> [[P0]], [[P0]] -// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0 -// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1 -// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]] -// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2 -// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]] -// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3 -// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_2]], [[ADD_I_1]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_2]]) -// CHECK-NEXT: ret float [[TMP0]] -// -float test_length_float4(float4 p0) -{ - return length(p0); -} +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF + +// NATIVE_HALF: define noundef nofpclass(nan inf) half @ +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16(half +// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float +// NATIVE_HALF: ret half +// NO_HALF: ret float +half test_length_half(half p0) +{ + return length(p0); +} +// NATIVE_HALF: define noundef nofpclass(nan inf) half @ +// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v2f16 +// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32( +// NATIVE_HALF: ret half %hlsl.length +// NO_HALF: ret float %hlsl.length +half test_length_half2(half2 p0) +{ + return length(p0); +} +// NATIVE_HALF: define noundef nofpclass(nan inf) half @ +// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v3f16 +// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32( +// NATIVE_HALF: ret half %hlsl.length +// NO_HALF: ret float %hlsl.length +half test_length_half3(half3 p0) +{ + return length(p0); +} +// NATIVE_HALF: define noundef nofpclass(nan inf) half @ +// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v4f16 +// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32( +// NATIVE_HALF: ret half %hlsl.length +// NO_HALF: ret float %hlsl.length +half test_length_half4(half4 p0) +{ + return length(p0); +} + +// CHECK: define noundef nofpclass(nan inf) float @ +// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float +// CHECK: ret float +float test_length_float(float p0) +{ + return length(p0); +} +// CHECK: define noundef nofpclass(nan inf) float @ +// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32( +// CHECK: ret float %hlsl.length +float test_length_float2(float2 p0) +{ + return length(p0); +} +// CHECK: define noundef nofpclass(nan inf) float @ +// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32( +// CHECK: ret float %hlsl.length +float test_length_float3(float3 p0) +{ + return length(p0); +} +// CHECK: define noundef nofpclass(nan inf) float @ +// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32( +// CHECK: ret float %hlsl.length +float test_length_float4(float4 p0) +{ + return length(p0); +} diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl index a191f0419fbba..281faada6f5e9 100644 --- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl @@ -1,53 +1,32 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected + void test_too_few_arg() { - return length(); - // expected-error@-1 {{no matching function for call to 'length'}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}} + return __builtin_hlsl_length(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} } void test_too_many_arg(float2 p0) { - return length(p0, p0); - // expected-error@-1 {{no matching function for call to 'length'}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}} -} - -float double_to_float_type(double p0) { - return length(p0); - // expected-error@-1 {{call to 'length' is ambiguous}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + return __builtin_hlsl_length(p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} } - -float bool_to_float_type_promotion(bool p1) +bool builtin_bool_to_float_type_promotion(bool p1) { - return length(p1); - // expected-error@-1 {{call to 'length' is ambiguous}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + return __builtin_hlsl_length(p1); + // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}} } -float length_int_to_float_promotion(int p1) +bool builtin_length_int_to_float_promotion(int p1) { - return length(p1); - // expected-error@-1 {{call to 'length' is ambiguous}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + return __builtin_hlsl_length(p1); + // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}} } -float2 length_int2_to_float2_promotion(int2 p1) +bool2 builtin_length_int2_to_float2_promotion(int2 p1) { - return length(p1); - // expected-error@-1 {{call to 'length' is ambiguous}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} + return __builtin_hlsl_length(p1); + // expected-error@-1 {{passing 'int2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}} } diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index ef48af5b42dbf..3b1d1a88e01a8 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -93,6 +93,7 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1 def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem]>; +def int_dx_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index cf142806bb1df..51dc3025f0c37 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -57,6 +57,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_nclamp: case Intrinsic::dx_degrees: case Intrinsic::dx_lerp: + case Intrinsic::dx_length: case Intrinsic::dx_normalize: case Intrinsic::dx_fdot: case Intrinsic::dx_sdot: @@ -291,6 +292,32 @@ static Value *expandAnyOrAllIntrinsic(CallInst *Orig, return Result; } +static Value *expandLengthIntrinsic(CallInst *Orig) { + Value *X = Orig->getOperand(0); + IRBuilder<> Builder(Orig); + Type *Ty = X->getType(); + Type *EltTy = Ty->getScalarType(); + + // Though dx.length does work on scalar type, we can optimize it to just emit + // fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because + // CGBuiltin.cpp should have emitted a fabs call. + Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0); + auto *XVec = dyn_cast(Ty); + unsigned XVecSize = XVec->getNumElements(); + if (!(Ty->isVectorTy() && XVecSize > 1)) + report_fatal_error(Twine("Invalid input type for length intrinsic"), + /* gen_crash_diag=*/false); + + Value *Sum = Builder.CreateFMul(Elt, Elt); + for (unsigned I = 1; I < XVecSize; I++) { + Elt = Builder.CreateExtractElement(X, I); + Value *Mul = Builder.CreateFMul(Elt, Elt); + Sum = Builder.CreateFAdd(Sum, Mul); + } + return Builder.CreateIntrinsic(EltTy, Intrinsic::sqrt, ArrayRef{Sum}, + nullptr, "elt.sqrt"); +} + static Value *expandLerpIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); Value *Y = Orig->getOperand(1); @@ -562,6 +589,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::dx_lerp: Result = expandLerpIntrinsic(Orig); break; + case Intrinsic::dx_length: + Result = expandLengthIntrinsic(Orig); + break; case Intrinsic::dx_normalize: Result = expandNormalizeIntrinsic(Orig); break; diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll new file mode 100644 index 0000000000000..fc5868a7f6e82 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/length.ll @@ -0,0 +1,116 @@ +; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK +; RUN: opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK + +; Make sure dxil operation function calls for length are generated for half/float. + +declare half @llvm.fabs.f16(half) +declare half @llvm.dx.length.v2f16(<2 x half>) +declare half @llvm.dx.length.v3f16(<3 x half>) +declare half @llvm.dx.length.v4f16(<4 x half>) + +declare float @llvm.fabs.f32(float) +declare float @llvm.dx.length.v2f32(<2 x float>) +declare float @llvm.dx.length.v3f32(<3 x float>) +declare float @llvm.dx.length.v4f32(<4 x float>) + +define noundef half @test_length_half2(<2 x half> noundef %p0) { +entry: + ; CHECK: extractelement <2 x half> %{{.*}}, i64 0 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: extractelement <2 x half> %{{.*}}, i64 1 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}}) + ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}}) + + %hlsl.length = call half @llvm.dx.length.v2f16(<2 x half> %p0) + ret half %hlsl.length +} + +define noundef half @test_length_half3(<3 x half> noundef %p0) { +entry: + ; CHECK: extractelement <3 x half> %{{.*}}, i64 0 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: extractelement <3 x half> %{{.*}}, i64 1 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; CHECK: extractelement <3 x half> %{{.*}}, i64 2 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}}) + ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}}) + + %hlsl.length = call half @llvm.dx.length.v3f16(<3 x half> %p0) + ret half %hlsl.length +} + +define noundef half @test_length_half4(<4 x half> noundef %p0) { +entry: + ; CHECK: extractelement <4 x half> %{{.*}}, i64 0 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x half> %{{.*}}, i64 1 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x half> %{{.*}}, i64 2 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x half> %{{.*}}, i64 3 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}}) + ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}}) + + %hlsl.length = call half @llvm.dx.length.v4f16(<4 x half> %p0) + ret half %hlsl.length +} + +define noundef float @test_length_float2(<2 x float> noundef %p0) { +entry: + ; CHECK: extractelement <2 x float> %{{.*}}, i64 0 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: extractelement <2 x float> %{{.*}}, i64 1 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) + ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) + + %hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %p0) + ret float %hlsl.length +} + +define noundef float @test_length_float3(<3 x float> noundef %p0) { +entry: + ; CHECK: extractelement <3 x float> %{{.*}}, i64 0 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: extractelement <3 x float> %{{.*}}, i64 1 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; CHECK: extractelement <3 x float> %{{.*}}, i64 2 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) + ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) + + %hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %p0) + ret float %hlsl.length +} + +define noundef float @test_length_float4(<4 x float> noundef %p0) { +entry: + ; CHECK: extractelement <4 x float> %{{.*}}, i64 0 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x float> %{{.*}}, i64 1 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x float> %{{.*}}, i64 2 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x float> %{{.*}}, i64 3 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) + ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) + + %hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %p0) + ret float %hlsl.length +} diff --git a/llvm/test/CodeGen/DirectX/length_error.ll b/llvm/test/CodeGen/DirectX/length_error.ll new file mode 100644 index 0000000000000..143b41fc506e1 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/length_error.ll @@ -0,0 +1,10 @@ +; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation length does not support double overload type +; CHECK: Cannot create Sqrt operation: Invalid overload type + +define noundef double @test_length_double2(<2 x double> noundef %p0) { +entry: + %hlsl.length = call double @llvm.dx.length.v2f32(<2 x double> %p0) + ret double %hlsl.length +} diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll new file mode 100644 index 0000000000000..f722de2f9029e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll @@ -0,0 +1,10 @@ +; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation length does not support 1-element vector types. +; CHECK: LLVM ERROR: Invalid input type for length intrinsic + +define noundef float @test_length_float(<1 x float> noundef %p0) { +entry: + %hlsl.length = call float @llvm.dx.length.v1f32(<1 x float> %p0) + ret float %hlsl.length +} diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll new file mode 100644 index 0000000000000..ac3a0513eb6b2 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll @@ -0,0 +1,10 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation length does not support scalar types +; CHECK: error: invalid intrinsic signature + +define noundef float @test_length_float(float noundef %p0) { +entry: + %hlsl.length = call float @llvm.dx.length.f32(float %p0) + ret float %hlsl.length +} From 26aa20a3dd82e2ff5855bee04b22b35f6b1f026f Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 9 Jan 2025 11:21:03 -0800 Subject: [PATCH 39/79] Use range-based for to iterate over fields in record layout, NFCI (#122029) Move the common case of FieldDecl::getFieldIndex() inline to mitigate the cost of removing the extra `FieldNo` induction variable. Also rename isNoUniqueAddress parameter to isNonVirtualBaseType, which appears to be more accurate. I think the current name is just a consequence of autocomplete gone wrong. --- clang/include/clang/AST/Decl.h | 14 +++- clang/lib/AST/Decl.cpp | 10 +-- clang/lib/AST/RecordLayoutBuilder.cpp | 81 +++++++++------------ clang/lib/CodeGen/CGRecordLayoutBuilder.cpp | 6 +- 4 files changed, 52 insertions(+), 59 deletions(-) diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 67ee0bb412692..16fc98aa1a57f 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -3115,8 +3115,20 @@ class FieldDecl : public DeclaratorDecl, public Mergeable { /// Returns the index of this field within its record, /// as appropriate for passing to ASTRecordLayout::getFieldOffset. - unsigned getFieldIndex() const; + unsigned getFieldIndex() const { + const FieldDecl *Canonical = getCanonicalDecl(); + if (Canonical->CachedFieldIndex == 0) { + Canonical->setCachedFieldIndex(); + assert(Canonical->CachedFieldIndex != 0); + } + return Canonical->CachedFieldIndex - 1; + } +private: + /// Set CachedFieldIndex to the index of this field plus one. + void setCachedFieldIndex() const; + +public: /// Determines whether this field is mutable (C++ only). bool isMutable() const { return Mutable; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 741e908cf9bc5..97e23dd1aaa92 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -4651,12 +4651,9 @@ bool FieldDecl::isPotentiallyOverlapping() const { return hasAttr() && getType()->getAsCXXRecordDecl(); } -unsigned FieldDecl::getFieldIndex() const { - const FieldDecl *Canonical = getCanonicalDecl(); - if (Canonical != this) - return Canonical->getFieldIndex(); - - if (CachedFieldIndex) return CachedFieldIndex - 1; +void FieldDecl::setCachedFieldIndex() const { + assert(this == getCanonicalDecl() && + "should be called on the canonical decl"); unsigned Index = 0; const RecordDecl *RD = getParent()->getDefinition(); @@ -4670,7 +4667,6 @@ unsigned FieldDecl::getFieldIndex() const { } assert(CachedFieldIndex && "failed to find field in parent"); - return CachedFieldIndex - 1; } SourceRange FieldDecl::getSourceRange() const { diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index f749d3a705fc9..4493dd00d26d8 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -138,9 +138,9 @@ class EmptySubobjectMap { return Offset <= MaxEmptyClassOffset; } - CharUnits - getFieldOffset(const ASTRecordLayout &Layout, unsigned FieldNo) const { - uint64_t FieldOffset = Layout.getFieldOffset(FieldNo); + CharUnits getFieldOffset(const ASTRecordLayout &Layout, + const FieldDecl *Field) const { + uint64_t FieldOffset = Layout.getFieldOffset(Field->getFieldIndex()); assert(FieldOffset % CharWidth == 0 && "Field offset not at char boundary!"); @@ -298,14 +298,12 @@ EmptySubobjectMap::CanPlaceBaseSubobjectAtOffset(const BaseSubobjectInfo *Info, } // Traverse all member variables. - unsigned FieldNo = 0; - for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(), - E = Info->Class->field_end(); I != E; ++I, ++FieldNo) { - if (I->isBitField()) + for (const FieldDecl *Field : Info->Class->fields()) { + if (Field->isBitField()) continue; - CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); - if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset)) + CharUnits FieldOffset = Offset + getFieldOffset(Layout, Field); + if (!CanPlaceFieldSubobjectAtOffset(Field, FieldOffset)) return false; } @@ -345,14 +343,12 @@ void EmptySubobjectMap::UpdateEmptyBaseSubobjects(const BaseSubobjectInfo *Info, } // Traverse all member variables. - unsigned FieldNo = 0; - for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(), - E = Info->Class->field_end(); I != E; ++I, ++FieldNo) { - if (I->isBitField()) + for (const FieldDecl *Field : Info->Class->fields()) { + if (Field->isBitField()) continue; - CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); - UpdateEmptyFieldSubobjects(*I, FieldOffset, PlacingEmptyBase); + CharUnits FieldOffset = Offset + getFieldOffset(Layout, Field); + UpdateEmptyFieldSubobjects(Field, FieldOffset, PlacingEmptyBase); } } @@ -410,15 +406,12 @@ EmptySubobjectMap::CanPlaceFieldSubobjectAtOffset(const CXXRecordDecl *RD, } // Traverse all member variables. - unsigned FieldNo = 0; - for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); - I != E; ++I, ++FieldNo) { - if (I->isBitField()) + for (const FieldDecl *Field : RD->fields()) { + if (Field->isBitField()) continue; - CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); - - if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset)) + CharUnits FieldOffset = Offset + getFieldOffset(Layout, Field); + if (!CanPlaceFieldSubobjectAtOffset(Field, FieldOffset)) return false; } @@ -465,9 +458,8 @@ EmptySubobjectMap::CanPlaceFieldSubobjectAtOffset(const FieldDecl *FD, return true; } -bool -EmptySubobjectMap::CanPlaceFieldAtOffset(const FieldDecl *FD, - CharUnits Offset) { +bool EmptySubobjectMap::CanPlaceFieldAtOffset(const FieldDecl *FD, + CharUnits Offset) { if (!CanPlaceFieldSubobjectAtOffset(FD, Offset)) return false; @@ -521,15 +513,12 @@ void EmptySubobjectMap::UpdateEmptyFieldSubobjects( } // Traverse all member variables. - unsigned FieldNo = 0; - for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); - I != E; ++I, ++FieldNo) { - if (I->isBitField()) + for (const FieldDecl *Field : RD->fields()) { + if (Field->isBitField()) continue; - CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); - - UpdateEmptyFieldSubobjects(*I, FieldOffset, PlacingOverlappingField); + CharUnits FieldOffset = Offset + getFieldOffset(Layout, Field); + UpdateEmptyFieldSubobjects(Field, FieldOffset, PlacingOverlappingField); } } @@ -1455,10 +1444,8 @@ void ItaniumRecordLayoutBuilder::LayoutFields(const RecordDecl *D) { bool InsertExtraPadding = D->mayInsertExtraPadding(/*EmitRemark=*/true); bool HasFlexibleArrayMember = D->hasFlexibleArrayMember(); for (auto I = D->field_begin(), End = D->field_end(); I != End; ++I) { - auto Next(I); - ++Next; - LayoutField(*I, - InsertExtraPadding && (Next != End || !HasFlexibleArrayMember)); + LayoutField(*I, InsertExtraPadding && + (std::next(I) != End || !HasFlexibleArrayMember)); } } @@ -3672,35 +3659,33 @@ static void DumpRecordLayout(raw_ostream &OS, const RecordDecl *RD, } // Dump fields. - uint64_t FieldNo = 0; - for (RecordDecl::field_iterator I = RD->field_begin(), - E = RD->field_end(); I != E; ++I, ++FieldNo) { - const FieldDecl &Field = **I; - uint64_t LocalFieldOffsetInBits = Layout.getFieldOffset(FieldNo); + for (const FieldDecl *Field : RD->fields()) { + uint64_t LocalFieldOffsetInBits = + Layout.getFieldOffset(Field->getFieldIndex()); CharUnits FieldOffset = Offset + C.toCharUnitsFromBits(LocalFieldOffsetInBits); // Recursively dump fields of record type. - if (auto RT = Field.getType()->getAs()) { + if (auto RT = Field->getType()->getAs()) { DumpRecordLayout(OS, RT->getDecl(), C, FieldOffset, IndentLevel, - Field.getName().data(), + Field->getName().data(), /*PrintSizeInfo=*/false, /*IncludeVirtualBases=*/true); continue; } - if (Field.isBitField()) { + if (Field->isBitField()) { uint64_t LocalFieldByteOffsetInBits = C.toBits(FieldOffset - Offset); unsigned Begin = LocalFieldOffsetInBits - LocalFieldByteOffsetInBits; - unsigned Width = Field.getBitWidthValue(C); + unsigned Width = Field->getBitWidthValue(C); PrintBitFieldOffset(OS, FieldOffset, Begin, Width, IndentLevel); } else { PrintOffset(OS, FieldOffset, IndentLevel); } const QualType &FieldType = C.getLangOpts().DumpRecordLayoutsCanonical - ? Field.getType().getCanonicalType() - : Field.getType(); - OS << FieldType << ' ' << Field << '\n'; + ? Field->getType().getCanonicalType() + : Field->getType(); + OS << FieldType << ' ' << *Field << '\n'; } // Dump virtual bases. diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index ea44e6f21f3c8..209ef236f0d5d 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -182,7 +182,7 @@ struct CGRecordLowering { llvm::Type *StorageType); /// Lowers an ASTRecordLayout to a llvm type. void lower(bool NonVirtualBaseType); - void lowerUnion(bool isNoUniqueAddress); + void lowerUnion(bool isNonVirtualBaseType); void accumulateFields(bool isNonVirtualBaseType); RecordDecl::field_iterator accumulateBitFields(bool isNonVirtualBaseType, @@ -310,9 +310,9 @@ void CGRecordLowering::lower(bool NVBaseType) { computeVolatileBitfields(); } -void CGRecordLowering::lowerUnion(bool isNoUniqueAddress) { +void CGRecordLowering::lowerUnion(bool isNonVirtualBaseType) { CharUnits LayoutSize = - isNoUniqueAddress ? Layout.getDataSize() : Layout.getSize(); + isNonVirtualBaseType ? Layout.getDataSize() : Layout.getSize(); llvm::Type *StorageType = nullptr; bool SeenNamedMember = false; // Iterate through the fields setting bitFieldInfo and the Fields array. Also From 5ff36748cfeee1d02da6512ad578e4014724f67e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 9 Jan 2025 10:39:38 -0800 Subject: [PATCH 40/79] [SLP]Fix mask processing for reused gathered scalars Need to sync the mask between cost and actual emission to avoid bugs in mask calculation Fixes #122324 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 14 +++++- .../X86/shuffle-mask-emission.ll | 43 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 36fed8937aec2..48ed612c11b36 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10973,7 +10973,19 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } } - ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true); + if (!ExtMask.empty()) { + if (CommonMask.empty()) { + CommonMask.assign(ExtMask.begin(), ExtMask.end()); + } else { + SmallVector NewMask(ExtMask.size(), PoisonMaskElem); + for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) { + if (ExtMask[I] == PoisonMaskElem) + continue; + NewMask[I] = CommonMask[ExtMask[I]]; + } + CommonMask.swap(NewMask); + } + } if (CommonMask.empty()) { assert(InVectors.size() == 1 && "Expected only one vector with no mask"); return Cost; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll new file mode 100644 index 0000000000000..fcc295de62adf --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i1 @test() { +; CHECK-LABEL: define i1 @test() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[H_PROMOTED118_I_FR:%.*]] = freeze i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[H_PROMOTED118_I_FR]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP8]], false +; CHECK-NEXT: ret i1 [[OP_RDX]] +; +entry: + %h.promoted118.i.fr = freeze i32 1 + %invariant.op.i51 = add i32 %h.promoted118.i.fr, 0 + %conv25.i = xor i32 0, 0 + %add.i.i = add i32 %conv25.i, %h.promoted118.i.fr + %sext.i.mask = and i32 %add.i.i, 0 + %cmp27.i = icmp eq i32 %sext.i.mask, 1 + %0 = or i1 %cmp27.i, false + %conv25.i.1 = add i32 0, 0 + %add.i.i.1 = add i32 %conv25.i.1, %h.promoted118.i.fr + %sext.i.1.mask = and i32 %add.i.i.1, 1 + %cmp27.i.1 = icmp eq i32 %sext.i.1.mask, 0 + %conv25.1.i.1 = xor i32 0, 0 + %add.i.1.i.1 = add i32 %conv25.1.i.1, %h.promoted118.i.fr + %sext.1.i.1.mask = and i32 %add.i.1.i.1, 1 + %cmp27.1.i.1 = icmp eq i32 %sext.1.i.1.mask, 0 + %add.i.2.reass.i.1 = add i32 %invariant.op.i51, %conv25.i.1 + %sext.2.i.1.mask = and i32 %add.i.2.reass.i.1, 1 + %cmp27.2.i.1 = icmp eq i32 %sext.2.i.1.mask, 0 + %1 = or i1 %cmp27.1.i.1, %cmp27.2.i.1 + %2 = or i1 %cmp27.i.1, %1 + %3 = or i1 %0, %2 + ret i1 %3 +} From 1842a3d833d934793012c717e98b10d51193fd0d Mon Sep 17 00:00:00 2001 From: Paul Bowen-Huggett Date: Thu, 9 Jan 2025 20:26:07 +0100 Subject: [PATCH 41/79] Fix a cmake error when using the Xcode generator. (#119403) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I’m seeing a series of errors when trying to run the cmake configure step on macOS when the cmake generator is set to Xcode. All is well if I use the Ninja or Unix Makefile generators. Messages are all of the form: ~~~ CMake Error at …llvm-project/clang/cmake/modules/AddClang.cmake:120 (target_compile_definitions): Cannot specify compile definitions for target "obj.clangBasic" which is not built by this project. Call Stack (most recent call first): …llvm-project/clang/lib/Basic/CMakeLists.txt:57 (add_clang_library) ~~~ The remaining errors are similar but mention targets obj.clangAPINotes, obj.clangLex, obj.clangParse, and so on. The regression appears to have been introduced by commit 09fa2f012fcc (Oct 14 2024) which added the code in this area. My proposed solution is simply to add a test to ensure that the obj.x target exists before setting its compile definitions. There is precedent doing just this in both clang/cmake/modules/AddClang.cmake and clang/lib/support/CMakeLists.txt as well as in the “MSVC AND NOT CLANG_LINK_CLANG_DYLIB” path immediately above the offending line. I’ve also made a couple of grammatical tweaks in the comments surrounding this code. In case it's relevant, the cmake settings and definitions I've used to trigger these errors is: ~~~bash GENERATOR="Xcode" OUTDIR=build_macos cmake \ -S "$SCRIPT_DIR/llvm" \ -B "$SCRIPT_DIR/$OUTDIR" \ -G "$GENERATOR" \ -D CMAKE_BUILD_TYPE=Release \ -D CMAKE_OSX_ARCHITECTURES=arm64 \ -D LLVM_PARALLEL_LINK_JOBS=1 \ -D LLVM_ENABLE_PROJECTS="clang;lld" \ -D LLVM_TARGETS_TO_BUILD=RISCV \ -D LLVM_DEFAULT_TARGET_TRIPLE=riscv32-unknown-elf \ -D LLVM_OPTIMIZED_TABLEGEN=Yes ~~~ (cmake v3.31.1, Xcode 16.1. I know that not all of these variables are useful for the Xcode generator!) Co-authored-by: Paul Bowen-Huggett --- clang/cmake/modules/AddClang.cmake | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake index 091aec98e93ca..cdc8bd5cd503b 100644 --- a/clang/cmake/modules/AddClang.cmake +++ b/clang/cmake/modules/AddClang.cmake @@ -109,13 +109,14 @@ macro(add_clang_library name) llvm_add_library(${name} ${LIBTYPE} ${ARG_UNPARSED_ARGUMENTS} ${srcs}) if(MSVC AND NOT CLANG_LINK_CLANG_DYLIB) - # Make sure all consumers also turn off visibility macros so there not trying to dllimport symbols. + # Make sure all consumers also turn off visibility macros so they're not + # trying to dllimport symbols. target_compile_definitions(${name} PUBLIC CLANG_BUILD_STATIC) if(TARGET "obj.${name}") target_compile_definitions("obj.${name}" PUBLIC CLANG_BUILD_STATIC) endif() - elseif(NOT ARG_SHARED AND NOT ARG_STATIC) - # Clang component libraries linked in to clang-cpp are declared without SHARED or STATIC + elseif(TARGET "obj.${name}" AND NOT ARG_SHARED AND NOT ARG_STATIC) + # Clang component libraries linked to clang-cpp are declared without SHARED or STATIC target_compile_definitions("obj.${name}" PUBLIC CLANG_EXPORTS) endif() From 2c6ed5f7eec30c22e136c03777eda9a8c4c0c79b Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 9 Jan 2025 14:26:19 -0500 Subject: [PATCH 42/79] [libc++][NFC] Remove trailing whitespace from release notes --- libcxx/docs/ReleaseNotes/20.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 793b172a24af0..9a520d8452b06 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -125,9 +125,9 @@ Deprecations and Removals supported as an extension anymore, please migrate any code that uses e.g. ``std::vector`` to be standards conforming. -- Non-conforming member typedefs ``base``, ``iterator``, ``const_iterator``, and ``const_reference`` of ``std::bitset``, +- Non-conforming member typedefs ``base``, ``iterator``, ``const_iterator``, and ``const_reference`` of ``std::bitset``, and member typedef ``base`` of ``std::forward_list`` and ``std::list`` are removed. Previously, these member typedefs - (except ``const_reference``) were private but could cause ambiguity in name lookup. Code that expects such ambiguity + (except ``const_reference``) were private but could cause ambiguity in name lookup. Code that expects such ambiguity will possibly not compile in LLVM 20. - The function ``__libcpp_verbose_abort()`` is now ``noexcept``, to match ``std::terminate()``. (The combination of From b16777afb0c1799bb3eaa63c66213180c99c9d25 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 9 Jan 2025 11:36:09 -0800 Subject: [PATCH 43/79] [RISCV] Return MILog2SEW for mask instructions getOperandLog2EEW. NFC (#122332) The SEW operand for these instructions should have a value of 0. This matches what was done for vcpop/vfirst. --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 6661921d66f95..89684decde6c1 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -637,7 +637,7 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VMSBF_M: case RISCV::VMSIF_M: case RISCV::VMSOF_M: { - return 0; + return MILog2SEW; } // Vector Iota Instruction From 876841b0e2aea4ae8dac58842242e311b8aef432 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 9 Jan 2025 11:37:40 -0800 Subject: [PATCH 44/79] [WebAssembly] Format WebAssembly ReleaseNote entries (#122203) --- llvm/docs/ReleaseNotes.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 3463dc8339fd8..4d5f5fb0384b8 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -240,15 +240,15 @@ Changes to the RISC-V Backend Changes to the WebAssembly Backend ---------------------------------- -The default target CPU, "generic", now enables the `-mnontrapping-fptoint` -and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations] -and [Non-trapping float-to-int Conversions] language features, which are -[widely implemented in engines]. - -A new Lime1 target CPU is added, -mcpu=lime1. This CPU follows the definition of -the Lime1 CPU [here], and enables -mmultivalue, -mmutable-globals, --mcall-indirect-overlong, -msign-ext, -mbulk-memory-opt, -mnontrapping-fptoint, -and -mextended-const. +* The default target CPU, "generic", now enables the `-mnontrapping-fptoint` + and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations] + and [Non-trapping float-to-int Conversions] language features, which are + [widely implemented in engines]. + +* A new Lime1 target CPU is added, `-mcpu=lime1`. This CPU follows the + definition of the Lime1 CPU [here], and enables `-mmultivalue`, + `-mmutable-globals`, `-mcall-indirect-overlong`, `-msign-ext`, + `-mbulk-memory-opt`, `-mnontrapping-fptoint`, and `-mextended-const`. [Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md [Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md From 1b897f737df2097f8fee1b203676ea7f01dff06d Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Thu, 9 Jan 2025 20:40:31 +0100 Subject: [PATCH 45/79] [clang-tidy] Fix misc-unused-parameters on params with attrs (#122286) Don't suggest to comment-out the parameter name if the parameter has an attribute that's spelled after the parameter name. This prevents the parameter's attributes from being wrongly applied to the parameter's type. This fixes #122191. --- .../clang-tidy/misc/UnusedParametersCheck.cpp | 19 +++++++++++++++++++ .../checkers/misc/unused-parameters.cpp | 16 ++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp index c2d9286312dc4..d792b5c52191f 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp @@ -9,8 +9,11 @@ #include "UnusedParametersCheck.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/STLExtras.h" #include @@ -26,6 +29,17 @@ bool isOverrideMethod(const FunctionDecl *Function) { return MD->size_overridden_methods() > 0 || MD->hasAttr(); return false; } + +bool hasAttrAfterParam(const SourceManager *SourceManager, + const ParmVarDecl *Param) { + for (const auto *Attr : Param->attrs()) { + if (SourceManager->isBeforeInTranslationUnit(Param->getLocation(), + Attr->getLocation())) { + return true; + } + } + return false; +} } // namespace void UnusedParametersCheck::registerMatchers(MatchFinder *Finder) { @@ -189,6 +203,11 @@ void UnusedParametersCheck::check(const MatchFinder::MatchResult &Result) { if (Param->isUsed() || Param->isReferenced() || !Param->getDeclName() || Param->hasAttr()) continue; + if (hasAttrAfterParam(Result.SourceManager, Param)) { + // Due to how grammar works, attributes would be wrongly applied to the + // type if we remove the preceding parameter name. + continue; + } // In non-strict mode ignore function definitions with empty bodies // (constructor initializer counts for non-empty body). diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp index a3fcf30f273ef..524de45463e36 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp @@ -33,6 +33,16 @@ void f(void (*fn)()) {;} // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: parameter 'fn' is unused [misc-unused-parameters] // CHECK-FIXES: {{^}}void f(void (* /*fn*/)()) {;}{{$}} +int *k([[clang::lifetimebound]] int *i) {;} +// CHECK-MESSAGES: :[[@LINE-1]]:38: warning: parameter 'i' is unused [misc-unused-parameters] +// CHECK-FIXES: {{^}}int *k({{\[\[clang::lifetimebound\]\]}} int * /*i*/) {;}{{$}} + +#define ATTR_BEFORE(x) [[clang::lifetimebound]] x +int* m(ATTR_BEFORE(const int *i)) { return nullptr; } +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: parameter 'i' is unused [misc-unused-parameters] +// CHECK-FIXES: {{^}}int* m(ATTR_BEFORE(const int * /*i*/)) { return nullptr; }{{$}} +#undef ATTR_BEFORE + // Unchanged cases // =============== void f(int i); // Don't remove stuff in declarations @@ -42,6 +52,12 @@ void s(int i[1]); void u(void (*fn)()); void w(int i) { (void)i; } // Don't remove used parameters +// Don't reanchor the attribute to the type: +int *x(int *i [[clang::lifetimebound]]) { return nullptr; } +#define ATTR_AFTER(x) x [[clang::lifetimebound]] +int* y(ATTR_AFTER(const int *i)) { return nullptr; } +#undef ATTR_AFTER + bool useLambda(int (*fn)(int)); static bool static_var = useLambda([] (int a) { return a; }); From 0aa831e0edb1c1deabb96ce2435667cc82bac79b Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Thu, 9 Jan 2025 11:42:22 -0800 Subject: [PATCH 46/79] [mlir][GPU] Implement ValueBoundsOpInterface for GPU ID operations (#122190) The GPU ID operations already implement InferIntRangeInterface, which gives constant lower and upper bounds on those IDs when appropriate metadata is prentent on the operations or in the surrounding context. This commit uses that existing code to implement the ValueBoundsOpInterface, which is used when analyzing affine operations (unlike the integer range interface, which is used for arithmetic optimization). It also implements the interface for gpu.launch, where we can use it to express the constraint that block/grid sizes are equal to their value from outside the launch op and that the corresponding IDs are bounded above by that size. As a consequence, the test pass for this inference is updated to work on a FunctionOpInterface and not a func.func, creating minor churn in other tests. --- .../GPU/IR/ValueBoundsOpInterfaceImpl.h | 19 +++ mlir/include/mlir/InitAllDialects.h | 2 + mlir/lib/Dialect/GPU/CMakeLists.txt | 3 +- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 5 + .../GPU/IR/ValueBoundsOpInterfaceImpl.cpp | 114 +++++++++++++ .../value-bounds-op-interface-impl.mlir | 2 +- .../Affine/value-bounds-reification.mlir | 4 +- .../Arith/value-bounds-op-interface-impl.mlir | 4 +- .../GPU/value-bounds-op-interface-impl.mlir | 159 ++++++++++++++++++ .../value-bounds-op-interface-impl.mlir | 2 +- .../value-bounds-op-interface-impl.mlir | 2 +- .../SCF/value-bounds-op-interface-impl.mlir | 2 +- .../value-bounds-op-interface-impl.mlir | 2 +- .../Dialect/Vector/test-scalable-bounds.mlir | 2 +- .../value-bounds-op-interface-impl.mlir | 2 +- .../Dialect/Affine/TestReifyValueBounds.cpp | 8 +- 16 files changed, 317 insertions(+), 15 deletions(-) create mode 100644 mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h create mode 100644 mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp create mode 100644 mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir diff --git a/mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h b/mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h new file mode 100644 index 0000000000000..9a4e159ef76c8 --- /dev/null +++ b/mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h @@ -0,0 +1,19 @@ +//===- ValueBoundsOpInterfaceImpl.h - Impl. of ValueBoundsOpInterface -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H +#define MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H + +namespace mlir { +class DialectRegistry; + +namespace gpu { +void registerValueBoundsOpInterfaceExternalModels(DialectRegistry ®istry); +} // namespace gpu +} // namespace mlir +#endif // MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index 7fd0432ddce1b..c102f811cce4b 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -37,6 +37,7 @@ #include "mlir/Dialect/EmitC/IR/EmitC.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h" #include "mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h" #include "mlir/Dialect/IRDL/IR/IRDL.h" #include "mlir/Dialect/Index/IR/IndexDialect.h" @@ -164,6 +165,7 @@ inline void registerAllDialects(DialectRegistry ®istry) { cf::registerBufferizableOpInterfaceExternalModels(registry); cf::registerBufferDeallocationOpInterfaceExternalModels(registry); gpu::registerBufferDeallocationOpInterfaceExternalModels(registry); + gpu::registerValueBoundsOpInterfaceExternalModels(registry); LLVM::registerInlinerInterface(registry); linalg::registerAllDialectInterfaceImplementations(registry); linalg::registerRuntimeVerifiableOpInterfaceExternalModels(registry); diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 1026e9b509332..013311ec027da 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -1,6 +1,7 @@ add_mlir_dialect_library(MLIRGPUDialect IR/GPUDialect.cpp IR/InferIntRangeInterfaceImpls.cpp + IR/ValueBoundsOpInterfaceImpl.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU @@ -40,7 +41,7 @@ add_mlir_dialect_library(MLIRGPUTransforms Transforms/ShuffleRewriter.cpp Transforms/SPIRVAttachTarget.cpp Transforms/SubgroupReduceLowering.cpp - + OBJECT ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 8e36638d6e545..49209229259a7 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -29,6 +29,7 @@ #include "mlir/IR/TypeUtilities.h" #include "mlir/Interfaces/FunctionImplementation.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/Interfaces/ValueBoundsOpInterface.h" #include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/TypeSwitch.h" @@ -217,6 +218,10 @@ void GPUDialect::initialize() { addInterfaces(); declarePromisedInterface(); + declarePromisedInterfaces< + ValueBoundsOpInterface, ClusterDimOp, ClusterDimBlocksOp, ClusterIdOp, + ClusterBlockIdOp, BlockDimOp, BlockIdOp, GridDimOp, ThreadIdOp, LaneIdOp, + SubgroupIdOp, GlobalIdOp, NumSubgroupsOp, SubgroupSizeOp, LaunchOp>(); } static std::string getSparseHandleKeyword(SparseHandleKind kind) { diff --git a/mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp new file mode 100644 index 0000000000000..3bb7082daa5a0 --- /dev/null +++ b/mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp @@ -0,0 +1,114 @@ +//===- ValueBoundsOpInterfaceImpl.cpp - Impl. of ValueBoundsOpInterface ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h" + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Interfaces/InferIntRangeInterface.h" +#include "mlir/Interfaces/ValueBoundsOpInterface.h" + +using namespace mlir; +using namespace mlir::gpu; + +namespace { +/// Implement ValueBoundsOpInterface (which only works on index-typed values, +/// gathers a set of constraint expressions, and is used for affine analyses) +/// in terms of InferIntRangeInterface (which works +/// on arbitrary integer types, creates [min, max] ranges, and is used in for +/// arithmetic simplification). +template +struct GpuIdOpInterface + : public ValueBoundsOpInterface::ExternalModel, Op> { + void populateBoundsForIndexValue(Operation *op, Value value, + ValueBoundsConstraintSet &cstr) const { + auto inferrable = cast(op); + assert(value == op->getResult(0) && + "inferring for value that isn't the GPU op's result"); + auto translateConstraint = [&](Value v, const ConstantIntRanges &range) { + assert(v == value && + "GPU ID op inferring values for something that's not its result"); + cstr.bound(v) >= range.smin().getSExtValue(); + cstr.bound(v) <= range.smax().getSExtValue(); + }; + assert(inferrable->getNumOperands() == 0 && "ID ops have no operands"); + inferrable.inferResultRanges({}, translateConstraint); + } +}; + +struct GpuLaunchOpInterface + : public ValueBoundsOpInterface::ExternalModel { + void populateBoundsForIndexValue(Operation *op, Value value, + ValueBoundsConstraintSet &cstr) const { + auto launchOp = cast(op); + + Value sizeArg = nullptr; + bool isSize = false; + KernelDim3 gridSizeArgs = launchOp.getGridSizeOperandValues(); + KernelDim3 blockSizeArgs = launchOp.getBlockSizeOperandValues(); + + auto match = [&](KernelDim3 bodyArgs, KernelDim3 externalArgs, + bool areSizeArgs) { + if (value == bodyArgs.x) { + sizeArg = externalArgs.x; + isSize = areSizeArgs; + } + if (value == bodyArgs.y) { + sizeArg = externalArgs.y; + isSize = areSizeArgs; + } + if (value == bodyArgs.z) { + sizeArg = externalArgs.z; + isSize = areSizeArgs; + } + }; + match(launchOp.getThreadIds(), blockSizeArgs, false); + match(launchOp.getBlockSize(), blockSizeArgs, true); + match(launchOp.getBlockIds(), gridSizeArgs, false); + match(launchOp.getGridSize(), gridSizeArgs, true); + if (launchOp.hasClusterSize()) { + KernelDim3 clusterSizeArgs = *launchOp.getClusterSizeOperandValues(); + match(*launchOp.getClusterIds(), clusterSizeArgs, false); + match(*launchOp.getClusterSize(), clusterSizeArgs, true); + } + + if (!sizeArg) + return; + if (isSize) { + cstr.bound(value) == cstr.getExpr(sizeArg); + cstr.bound(value) >= 1; + } else { + cstr.bound(value) < cstr.getExpr(sizeArg); + cstr.bound(value) >= 0; + } + } +}; +} // namespace + +void mlir::gpu::registerValueBoundsOpInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, GPUDialect *dialect) { +#define REGISTER(X) X::attachInterface>(*ctx); + REGISTER(ClusterDimOp) + REGISTER(ClusterDimBlocksOp) + REGISTER(ClusterIdOp) + REGISTER(ClusterBlockIdOp) + REGISTER(BlockDimOp) + REGISTER(BlockIdOp) + REGISTER(GridDimOp) + REGISTER(ThreadIdOp) + REGISTER(LaneIdOp) + REGISTER(SubgroupIdOp) + REGISTER(GlobalIdOp) + REGISTER(NumSubgroupsOp) + REGISTER(SubgroupSizeOp) +#undef REGISTER + + LaunchOp::attachInterface(*ctx); + }); +} diff --git a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir index 5354eb38d7b03..a4310b91a37b3 100644 --- a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \ // RUN: -split-input-file | FileCheck %s // CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 + s1)> diff --git a/mlir/test/Dialect/Affine/value-bounds-reification.mlir b/mlir/test/Dialect/Affine/value-bounds-reification.mlir index 75622f59af83b..817614be50533 100644 --- a/mlir/test/Dialect/Affine/value-bounds-reification.mlir +++ b/mlir/test/Dialect/Affine/value-bounds-reification.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \ // RUN: -verify-diagnostics -split-input-file | FileCheck %s -// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args use-arith-ops" \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args use-arith-ops}))' \ // RUN: -verify-diagnostics -split-input-file | FileCheck %s --check-prefix=CHECK-ARITH // CHECK-LABEL: func @reify_through_chain( diff --git a/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir index 8fb3ba1a1ecce..a2653d4750ec8 100644 --- a/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \ // RUN: -verify-diagnostics -split-input-file | FileCheck %s -// RUN: mlir-opt %s -test-affine-reify-value-bounds="use-arith-ops" \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{use-arith-ops}))' \ // RUN: -verify-diagnostics -split-input-file | \ // RUN: FileCheck %s --check-prefix=CHECK-ARITH diff --git a/mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir new file mode 100644 index 0000000000000..6facf1e22aab9 --- /dev/null +++ b/mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir @@ -0,0 +1,159 @@ +// RUN: mlir-opt %s -pass-pipeline='builtin.module( \ +// RUN: func.func(test-affine-reify-value-bounds), \ +// RUN: gpu.module(llvm.func(test-affine-reify-value-bounds)), \ +// RUN: gpu.module(gpu.func(test-affine-reify-value-bounds)))' \ +// RUN: -verify-diagnostics \ +// RUN: -split-input-file | FileCheck %s + +// CHECK-LABEL: func @launch_func +func.func @launch_func(%arg0 : index) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c4 = arith.constant 4 : index + %c64 = arith.constant 64 : index + gpu.launch blocks(%block_id_x, %block_id_y, %block_id_z) in (%grid_dim_x = %arg0, %grid_dim_y = %c4, %grid_dim_z = %c2) + threads(%thread_id_x, %thread_id_y, %thread_id_z) in (%block_dim_x = %c64, %block_dim_y = %c4, %block_dim_z = %c2) { + + // Sanity checks: + // expected-error @below{{unknown}} + "test.compare" (%thread_id_x, %c1) {cmp = "EQ"} : (index, index) -> () + // expected-remark @below{{false}} + "test.compare" (%thread_id_x, %c64) {cmp = "GE"} : (index, index) -> () + + // expected-remark @below{{true}} + "test.compare" (%grid_dim_x, %c1) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare" (%grid_dim_x, %arg0) {cmp = "EQ"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare" (%grid_dim_y, %c4) {cmp = "EQ"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare" (%grid_dim_z, %c2) {cmp = "EQ"} : (index, index) -> () + + // expected-remark @below{{true}} + "test.compare"(%block_id_x, %c0) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%block_id_x, %arg0) {cmp = "LT"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%block_id_y, %c0) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%block_id_y, %c4) {cmp = "LT"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%block_id_z, %c0) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%block_id_z, %c2) {cmp = "LT"} : (index, index) -> () + + // expected-remark @below{{true}} + "test.compare" (%block_dim_x, %c64) {cmp = "EQ"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare" (%block_dim_y, %c4) {cmp = "EQ"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare" (%block_dim_z, %c2) {cmp = "EQ"} : (index, index) -> () + + // expected-remark @below{{true}} + "test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%thread_id_x, %c64) {cmp = "LT"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%thread_id_y, %c0) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%thread_id_y, %c4) {cmp = "LT"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%thread_id_z, %c0) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%thread_id_z, %c2) {cmp = "LT"} : (index, index) -> () + + // expected-remark @below{{true}} + "test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> () + gpu.terminator + } + + func.return +} + +// ----- + +// The tests for what the ranges are are located in int-range-interface.mlir, +// so here we just make sure that the results of that interface propagate into +// constraints. + +// CHECK-LABEL: func @kernel +module attributes {gpu.container_module} { + gpu.module @gpu_module { + llvm.func @kernel() attributes {gpu.kernel} { + + %c0 = arith.constant 0 : index + %ctid_max = arith.constant 4294967295 : index + %thread_id_x = gpu.thread_id x + + // expected-remark @below{{true}} + "test.compare" (%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare" (%thread_id_x, %ctid_max) {cmp = "LT"} : (index, index) -> () + llvm.return + } + } +} + +// ----- + +// CHECK-LABEL: func @annotated_kernel +module attributes {gpu.container_module} { + gpu.module @gpu_module { + gpu.func @annotated_kernel() kernel + attributes {known_block_size = array, + known_grid_size = array} { + + %c0 = arith.constant 0 : index + %c8 = arith.constant 8 : index + %thread_id_x = gpu.thread_id x + + // expected-remark @below{{true}} + "test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> () + + %block_dim_x = gpu.block_dim x + // expected-remark @below{{true}} + "test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> () + + // expected-remark @below{{true}} + "test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> () + gpu.return + } + } +} + +// ----- + +// CHECK-LABEL: func @local_bounds_kernel +module attributes {gpu.container_module} { + gpu.module @gpu_module { + gpu.func @local_bounds_kernel() kernel { + + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8 = arith.constant 8 : index + + %block_dim_x = gpu.block_dim x upper_bound 8 + // expected-remark @below{{true}} + "test.compare"(%block_dim_x, %c1) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%block_dim_x, %c8) {cmp = "LE"} : (index, index) -> () + // expected-error @below{{unknown}} + "test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> () + + %thread_id_x = gpu.thread_id x upper_bound 8 + // expected-remark @below{{true}} + "test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> () + // expected-remark @below{{true}} + "test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> () + + // Note: there isn't a way to express the ID <= size constraint + // in this form + // expected-error @below{{unknown}} + "test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> () + gpu.return + } + } +} diff --git a/mlir/test/Dialect/Linalg/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Linalg/value-bounds-op-interface-impl.mlir index 189c8e649ba5e..bcd330443cc44 100644 --- a/mlir/test/Dialect/Linalg/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Linalg/value-bounds-op-interface-impl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \ // RUN: -split-input-file | FileCheck %s // CHECK-LABEL: func @linalg_fill( diff --git a/mlir/test/Dialect/MemRef/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/MemRef/value-bounds-op-interface-impl.mlir index 0e0f216b05d48..dc311c6b59ea4 100644 --- a/mlir/test/Dialect/MemRef/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/MemRef/value-bounds-op-interface-impl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \ // RUN: -split-input-file | FileCheck %s // CHECK-LABEL: func @memref_alloc( diff --git a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir index 65e1017e62c1a..6e0c16a9a2b33 100644 --- a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \ // RUN: -verify-diagnostics -split-input-file | FileCheck %s // CHECK-LABEL: func @scf_for( diff --git a/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir index 0ba9983723a0a..c0f64d3c84361 100644 --- a/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \ // RUN: -split-input-file | FileCheck %s func.func @unknown_op() -> index { diff --git a/mlir/test/Dialect/Vector/test-scalable-bounds.mlir b/mlir/test/Dialect/Vector/test-scalable-bounds.mlir index 6af904beb660b..ddbd805b1cdec 100644 --- a/mlir/test/Dialect/Vector/test-scalable-bounds.mlir +++ b/mlir/test/Dialect/Vector/test-scalable-bounds.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds -cse -verify-diagnostics \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds, cse))' -verify-diagnostics \ // RUN: -verify-diagnostics -split-input-file | FileCheck %s #map_dim_i = affine_map<(d0)[s0] -> (-d0 + 32400, s0)> diff --git a/mlir/test/Dialect/Vector/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Vector/value-bounds-op-interface-impl.mlir index c04c82970f9c0..1a94bbac9dff8 100644 --- a/mlir/test/Dialect/Vector/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Vector/value-bounds-op-interface-impl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \ // RUN: -split-input-file | FileCheck %s // CHECK-LABEL: func @vector_transfer_write( diff --git a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp index 34513cd418e4c..2c954ffc4acef 100644 --- a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp +++ b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Interfaces/ValueBoundsOpInterface.h" #include "mlir/Pass/Pass.h" @@ -30,7 +31,8 @@ namespace { /// This pass applies the permutation on the first maximal perfect nest. struct TestReifyValueBounds - : public PassWrapper> { + : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestReifyValueBounds) StringRef getArgument() const final { return PASS_NAME; } @@ -74,7 +76,7 @@ invertComparisonOperator(ValueBoundsConstraintSet::ComparisonOperator cmp) { /// Look for "test.reify_bound" ops in the input and replace their results with /// the reified values. -static LogicalResult testReifyValueBounds(func::FuncOp funcOp, +static LogicalResult testReifyValueBounds(FunctionOpInterface funcOp, bool reifyToFuncArgs, bool useArithOps) { IRRewriter rewriter(funcOp.getContext()); @@ -156,7 +158,7 @@ static LogicalResult testReifyValueBounds(func::FuncOp funcOp, } /// Look for "test.compare" ops and emit errors/remarks. -static LogicalResult testEquality(func::FuncOp funcOp) { +static LogicalResult testEquality(FunctionOpInterface funcOp) { IRRewriter rewriter(funcOp.getContext()); WalkResult result = funcOp.walk([&](test::CompareOp op) { auto cmpType = op.getComparisonOperator(); From 6312beef788a209dc7d73c2c10b36197dab1cff3 Mon Sep 17 00:00:00 2001 From: vporpo Date: Thu, 9 Jan 2025 11:53:48 -0800 Subject: [PATCH 47/79] [SandboxVec][BottomUpVec] Use SeedCollector and slice seeds (#120826) With this patch we switch from the temporary dummy seeds to actual seeds provided by the seed collector. The seeds get sliced and each slice is used as the starting point for vectorization. --- llvm/include/llvm/SandboxIR/Pass.h | 6 +- llvm/include/llvm/SandboxIR/Utils.h | 7 +- .../Vectorize/SandboxVectorizer/Legality.h | 1 + .../Vectorize/SandboxVectorizer/Scheduler.h | 7 ++ .../SandboxVectorizer/SeedCollector.h | 4 +- .../Vectorize/SandboxVectorizer/VecUtils.h | 10 ++ .../SandboxVectorizer/Passes/BottomUpVec.cpp | 91 +++++++++++++++---- .../SandboxVectorizer/SandboxVectorizer.cpp | 2 +- .../SandboxVectorizer/SeedCollector.cpp | 17 ++-- .../SandboxVectorizer/bottomup_basic.ll | 2 +- .../SandboxVectorizer/bottomup_seed_slice.ll | 33 +++++++ .../bottomup_seed_slice_pow2.ll | 37 ++++++++ .../SandboxVectorizer/VecUtilsTest.cpp | 11 +++ 13 files changed, 196 insertions(+), 32 deletions(-) create mode 100644 llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll create mode 100644 llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll diff --git a/llvm/include/llvm/SandboxIR/Pass.h b/llvm/include/llvm/SandboxIR/Pass.h index 4f4eae87cd3ff..267389a8a87a2 100644 --- a/llvm/include/llvm/SandboxIR/Pass.h +++ b/llvm/include/llvm/SandboxIR/Pass.h @@ -16,6 +16,7 @@ namespace llvm { class AAResults; class ScalarEvolution; +class TargetTransformInfo; namespace sandboxir { @@ -25,15 +26,18 @@ class Region; class Analyses { AAResults *AA = nullptr; ScalarEvolution *SE = nullptr; + TargetTransformInfo *TTI = nullptr; Analyses() = default; public: - Analyses(AAResults &AA, ScalarEvolution &SE) : AA(&AA), SE(&SE) {} + Analyses(AAResults &AA, ScalarEvolution &SE, TargetTransformInfo &TTI) + : AA(&AA), SE(&SE), TTI(&TTI) {} public: AAResults &getAA() const { return *AA; } ScalarEvolution &getScalarEvolution() const { return *SE; } + TargetTransformInfo &getTTI() const { return *TTI; } /// For use by unit tests. static Analyses emptyForTesting() { return Analyses(); } }; diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h index a73498adea1d5..d58fe52214395 100644 --- a/llvm/include/llvm/SandboxIR/Utils.h +++ b/llvm/include/llvm/SandboxIR/Utils.h @@ -60,11 +60,16 @@ class Utils { getUnderlyingObject(LSI->getPointerOperand()->Val)); } + /// \Returns the number of bits of \p Ty. + static unsigned getNumBits(Type *Ty, const DataLayout &DL) { + return DL.getTypeSizeInBits(Ty->LLVMTy); + } + /// \Returns the number of bits required to represent the operands or return /// value of \p V in \p DL. static unsigned getNumBits(Value *V, const DataLayout &DL) { Type *Ty = getExpectedType(V); - return DL.getTypeSizeInBits(Ty->LLVMTy); + return getNumBits(Ty, DL); } /// \Returns the number of bits required to represent the operands or diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index 63d6ef31c8645..233cf82a1b3df 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -177,6 +177,7 @@ class LegalityAnalysis { // TODO: Try to remove the SkipScheduling argument by refactoring the tests. const LegalityResult &canVectorize(ArrayRef Bndl, bool SkipScheduling = false); + void clear() { Sched.clear(); } }; } // namespace llvm::sandboxir diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h index 9b68d47ce39aa..3ec0ac0f78a74 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h @@ -147,6 +147,13 @@ class Scheduler { ~Scheduler() {} bool trySchedule(ArrayRef Instrs); + /// Clear the scheduler's state, including the DAG. + void clear() { + Bndls.clear(); + // TODO: clear view once it lands. + DAG.clear(); + ScheduleTopItOpt = std::nullopt; + } #ifndef NDEBUG void dump(raw_ostream &OS) const; diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h index 6e16a84d832e5..73b2bdf8f181f 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h @@ -95,8 +95,8 @@ class SeedBundle { /// with a total size <= \p MaxVecRegBits, or an empty slice if the /// requirements cannot be met . If \p ForcePowOf2 is true, then the returned /// slice will have a total number of bits that is a power of 2. - MutableArrayRef - getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2); + ArrayRef getSlice(unsigned StartIdx, unsigned MaxVecRegBits, + bool ForcePowOf2); /// \Returns the number of seed elements in the bundle. std::size_t size() const { return Seeds.size(); } diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h index fc9d67fcfcdec..28fa33656dd5f 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h @@ -133,6 +133,16 @@ class VecUtils { assert(tryGetCommonScalarType(Bndl) && "Expected common scalar type!"); return ScalarTy; } + /// \Returns the first integer power of 2 that is <= Num. + static unsigned getFloorPowerOf2(unsigned Num) { + if (Num == 0) + return Num; + unsigned Mask = Num; + Mask >>= 1; + for (unsigned ShiftBy = 1; ShiftBy < sizeof(Num) * 8; ShiftBy <<= 1) + Mask |= Mask >> ShiftBy; + return Num & ~Mask; + } }; } // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index a2ea11be59b8e..18e072c17d202 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -8,29 +8,31 @@ #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/SandboxIR/Function.h" #include "llvm/SandboxIR/Instruction.h" #include "llvm/SandboxIR/Module.h" #include "llvm/SandboxIR/Utils.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" -namespace llvm::sandboxir { +namespace llvm { + +static cl::opt + OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(0), cl::Hidden, + cl::desc("Override the vector register size in bits, " + "which is otherwise found by querying TTI.")); +static cl::opt + AllowNonPow2("sbvec-allow-non-pow2", cl::init(false), cl::Hidden, + cl::desc("Allow non-power-of-2 vectorization.")); + +namespace sandboxir { BottomUpVec::BottomUpVec(StringRef Pipeline) : FunctionPass("bottom-up-vec"), RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {} -// TODO: This is a temporary function that returns some seeds. -// Replace this with SeedCollector's function when it lands. -static llvm::SmallVector collectSeeds(BasicBlock &BB) { - llvm::SmallVector Seeds; - for (auto &I : BB) - if (auto *SI = llvm::dyn_cast(&I)) - Seeds.push_back(SI); - return Seeds; -} - static SmallVector getOperand(ArrayRef Bndl, unsigned OpIdx) { SmallVector Operands; @@ -265,6 +267,7 @@ Value *BottomUpVec::vectorizeRec(ArrayRef Bndl, unsigned Depth) { bool BottomUpVec::tryVectorize(ArrayRef Bndl) { DeadInstrCandidates.clear(); + Legality->clear(); vectorizeRec(Bndl, /*Depth=*/0); tryEraseDeadInstrs(); return Change; @@ -275,17 +278,67 @@ bool BottomUpVec::runOnFunction(Function &F, const Analyses &A) { A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(), F.getContext()); Change = false; + const auto &DL = F.getParent()->getDataLayout(); + unsigned VecRegBits = + OverrideVecRegBits != 0 + ? OverrideVecRegBits + : A.getTTI() + .getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) + .getFixedValue(); + // TODO: Start from innermost BBs first for (auto &BB : F) { - // TODO: Replace with proper SeedCollector function. - auto Seeds = collectSeeds(BB); - // TODO: Slice Seeds into smaller chunks. - // TODO: If vectorization succeeds, run the RegionPassManager on the - // resulting region. - if (Seeds.size() >= 2) - Change |= tryVectorize(Seeds); + SeedCollector SC(&BB, A.getScalarEvolution()); + for (SeedBundle &Seeds : SC.getStoreSeeds()) { + unsigned ElmBits = + Utils::getNumBits(VecUtils::getElementType(Utils::getExpectedType( + Seeds[Seeds.getFirstUnusedElementIdx()])), + DL); + + auto DivideBy2 = [](unsigned Num) { + auto Floor = VecUtils::getFloorPowerOf2(Num); + if (Floor == Num) + return Floor / 2; + return Floor; + }; + // Try to create the largest vector supported by the target. If it fails + // reduce the vector size by half. + for (unsigned SliceElms = std::min(VecRegBits / ElmBits, + Seeds.getNumUnusedBits() / ElmBits); + SliceElms >= 2u; SliceElms = DivideBy2(SliceElms)) { + if (Seeds.allUsed()) + break; + // Keep trying offsets after FirstUnusedElementIdx, until we vectorize + // the slice. This could be quite expensive, so we enforce a limit. + for (unsigned Offset = Seeds.getFirstUnusedElementIdx(), + OE = Seeds.size(); + Offset + 1 < OE; Offset += 1) { + // Seeds are getting used as we vectorize, so skip them. + if (Seeds.isUsed(Offset)) + continue; + if (Seeds.allUsed()) + break; + + auto SeedSlice = + Seeds.getSlice(Offset, SliceElms * ElmBits, !AllowNonPow2); + if (SeedSlice.empty()) + continue; + + assert(SeedSlice.size() >= 2 && "Should have been rejected!"); + + // TODO: If vectorization succeeds, run the RegionPassManager on the + // resulting region. + + // TODO: Refactor to remove the unnecessary copy to SeedSliceVals. + SmallVector SeedSliceVals(SeedSlice.begin(), + SeedSlice.end()); + Change |= tryVectorize(SeedSliceVals); + } + } + } } return Change; } -} // namespace llvm::sandboxir +} // namespace sandboxir +} // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp index c22eb01d74a1c..a6e2b40000529 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp @@ -86,6 +86,6 @@ bool SandboxVectorizerPass::runImpl(Function &LLVMF) { // Create SandboxIR for LLVMF and run BottomUpVec on it. sandboxir::Function &F = *Ctx->createFunction(&LLVMF); - sandboxir::Analyses A(*AA, *SE); + sandboxir::Analyses A(*AA, *SE, *TTI); return FPM.runOnFunction(F, A); } diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp index 6ea34c5e0598d..a3ce663407c4a 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp @@ -31,9 +31,9 @@ cl::opt SeedGroupsLimit( cl::desc("Limit the number of collected seeds groups in a BB to " "cap compilation time.")); -MutableArrayRef SeedBundle::getSlice(unsigned StartIdx, - unsigned MaxVecRegBits, - bool ForcePowerOf2) { +ArrayRef SeedBundle::getSlice(unsigned StartIdx, + unsigned MaxVecRegBits, + bool ForcePowerOf2) { // Use uint32_t here for compatibility with IsPowerOf2_32 // BitCount tracks the size of the working slice. From that we can tell @@ -47,10 +47,13 @@ MutableArrayRef SeedBundle::getSlice(unsigned StartIdx, // Can't start a slice with a used instruction. assert(!isUsed(StartIdx) && "Expected unused at StartIdx"); for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) { + // Stop if this instruction is used. This needs to be done before + // getNumBits() because a "used" instruction may have been erased. + if (isUsed(StartIdx + NumElements)) + break; uint32_t InstBits = Utils::getNumBits(S); - // Stop if this instruction is used, or if adding it puts the slice over - // the limit. - if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits) + // Stop if adding it puts the slice over the limit. + if (BitCount + InstBits > MaxVecRegBits) break; NumElements++; BitCount += InstBits; @@ -68,7 +71,7 @@ MutableArrayRef SeedBundle::getSlice(unsigned StartIdx, "Must be a power of two"); // Return any non-empty slice if (NumElements > 1) - return MutableArrayRef(&Seeds[StartIdx], NumElements); + return ArrayRef(&Seeds[StartIdx], NumElements); else return {}; } diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll index 7422d287ff3e2..785d1f4ef666f 100644 --- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=sandbox-vectorizer -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s define void @store_load(ptr %ptr) { ; CHECK-LABEL: define void @store_load( diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll new file mode 100644 index 0000000000000..46cda3c80aaa3 --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s + + +declare void @foo() +define void @slice_seeds(ptr %ptr, float %val) { +; CHECK-LABEL: define void @slice_seeds( +; CHECK-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2 +; CHECK-NEXT: [[LD2:%.*]] = load float, ptr [[PTR2]], align 4 +; CHECK-NEXT: store float [[LD2]], ptr [[PTR2]], align 4 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 +; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4 +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr float, ptr %ptr, i32 0 + %ptr1 = getelementptr float, ptr %ptr, i32 1 + %ptr2 = getelementptr float, ptr %ptr, i32 2 + + %ld2 = load float, ptr %ptr2 + store float %ld2, ptr %ptr2 + ; This call blocks scheduling of all 3 stores. + call void @foo() + + %ld0 = load float, ptr %ptr0 + %ld1 = load float, ptr %ptr1 + store float %ld0, ptr %ptr0 + store float %ld1, ptr %ptr1 + ret void +} diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll new file mode 100644 index 0000000000000..22119c4491b92 --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=false -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=POW2 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=true -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=NON-POW2 + +define void @pow2(ptr %ptr, float %val) { +; POW2-LABEL: define void @pow2( +; POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) { +; POW2-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 +; POW2-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 +; POW2-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2 +; POW2-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 +; POW2-NEXT: [[LD2:%.*]] = load float, ptr [[PTR2]], align 4 +; POW2-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4 +; POW2-NEXT: store float [[LD2]], ptr [[PTR2]], align 4 +; POW2-NEXT: ret void +; +; NON-POW2-LABEL: define void @pow2( +; NON-POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) { +; NON-POW2-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 +; NON-POW2-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 +; NON-POW2-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2 +; NON-POW2-NEXT: [[PACK2:%.*]] = load <3 x float>, ptr [[PTR0]], align 4 +; NON-POW2-NEXT: store <3 x float> [[PACK2]], ptr [[PTR0]], align 4 +; NON-POW2-NEXT: ret void +; + %ptr0 = getelementptr float, ptr %ptr, i32 0 + %ptr1 = getelementptr float, ptr %ptr, i32 1 + %ptr2 = getelementptr float, ptr %ptr, i32 2 + + %ld0 = load float, ptr %ptr0 + %ld1 = load float, ptr %ptr1 + %ld2 = load float, ptr %ptr2 + store float %ld0, ptr %ptr0 + store float %ld1, ptr %ptr1 + store float %ld2, ptr %ptr2 + ret void +} diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp index cf7b6cbc7e55c..8661dcd5067c0 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp @@ -472,3 +472,14 @@ define void @foo(i8 %v, ptr %ptr) { #endif // NDEBUG } } + +TEST_F(VecUtilsTest, FloorPowerOf2) { + EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(0), 0u); + EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(1 << 0), 1u << 0); + EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(3), 2u); + EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(4), 4u); + EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(5), 4u); + EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(7), 4u); + EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(8), 8u); + EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(9), 8u); +} From 1739ba9bb5b5e0b2f3c7b381f7852b77a53ef3ba Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 9 Jan 2025 11:57:11 -0800 Subject: [PATCH 48/79] [OpenMP][FIX] Adjust test to be non-flaky (#122331) The test runs asynchronous kernels and depending on the timing the output is slightly different. We now only check for the common parts of the output. --- offload/test/sanitizer/kernel_crash_async.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/test/sanitizer/kernel_crash_async.c b/offload/test/sanitizer/kernel_crash_async.c index ee22ba504018b..d51418f817889 100644 --- a/offload/test/sanitizer/kernel_crash_async.c +++ b/offload/test/sanitizer/kernel_crash_async.c @@ -36,4 +36,4 @@ int main(void) { // TRACE: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l29) // TRACE: launchKernel // -// CHECK: Kernel {{[0-9]}}: {{.*}} (__omp_offloading_{{.*}}_main_l29) +// CHECK: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l29) From b57c0bac81fe4f5c85c6554ca574cf2d5648e0c5 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 9 Jan 2025 13:57:39 -0600 Subject: [PATCH 49/79] [OpenMP] Update atomic helpers to just use headers (#122185) Summary: Previously we had some indirection here, this patch updates these utilities to just be normal template functions. We use SFINAE to manage the special case handling for floats. Also this strips address spaces so it can be used more generally. --- offload/DeviceRTL/CMakeLists.txt | 2 +- offload/DeviceRTL/include/DeviceUtils.h | 41 +++++ offload/DeviceRTL/include/Synchronization.h | 169 ++++++++++++++----- offload/DeviceRTL/src/Synchronization.cpp | 175 +------------------- 4 files changed, 170 insertions(+), 217 deletions(-) diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt index 22940264f9b19..099634e211e7a 100644 --- a/offload/DeviceRTL/CMakeLists.txt +++ b/offload/DeviceRTL/CMakeLists.txt @@ -99,7 +99,7 @@ set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden ${clang_opt_flags} --offload-device-only -nocudalib -nogpulib -nogpuinc -nostdlibinc -fopenmp -fopenmp-cuda-mode - -Wno-unknown-cuda-version + -Wno-unknown-cuda-version -Wno-openmp-target -DOMPTARGET_DEVICE_RUNTIME -I${include_directory} -I${devicertl_base_directory}/../include diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h index 549ca16e1c34c..fb00d6c755255 100644 --- a/offload/DeviceRTL/include/DeviceUtils.h +++ b/offload/DeviceRTL/include/DeviceUtils.h @@ -19,6 +19,47 @@ namespace utils { +template struct type_identity { + using type = T; +}; + +template struct integral_constant { + inline static constexpr T value = v; +}; + +/// Freestanding SFINAE helpers. +template struct remove_cv : type_identity {}; +template struct remove_cv : type_identity {}; +template struct remove_cv : type_identity {}; +template struct remove_cv : type_identity {}; +template using remove_cv_t = typename remove_cv::type; + +using true_type = integral_constant; +using false_type = integral_constant; + +template struct is_same : false_type {}; +template struct is_same : true_type {}; +template +inline constexpr bool is_same_v = is_same::value; + +template struct is_floating_point { + inline static constexpr bool value = + is_same_v, float> || is_same_v, double>; +}; +template +inline constexpr bool is_floating_point_v = is_floating_point::value; + +template struct enable_if; +template struct enable_if : type_identity {}; +template +using enable_if_t = typename enable_if::type; + +template struct remove_addrspace : type_identity {}; +template +struct remove_addrspace : type_identity {}; +template +using remove_addrspace_t = typename remove_addrspace::type; + /// Return the value \p Var from thread Id \p SrcLane in the warp if the thread /// is identified by \p Mask. int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width); diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h index 7a73f9ba72877..ae065850d824c 100644 --- a/offload/DeviceRTL/include/Synchronization.h +++ b/offload/DeviceRTL/include/Synchronization.h @@ -13,9 +13,11 @@ #define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H #include "DeviceTypes.h" +#include "DeviceUtils.h" -namespace ompx { +#pragma omp begin declare target device_type(nohost) +namespace ompx { namespace atomic { enum OrderingTy { @@ -48,51 +50,124 @@ uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering, /// result is stored in \p *Addr; /// { -#define ATOMIC_COMMON_OP(TY) \ - TY add(TY *Addr, TY V, OrderingTy Ordering); \ - TY mul(TY *Addr, TY V, OrderingTy Ordering); \ - TY load(TY *Addr, OrderingTy Ordering); \ - void store(TY *Addr, TY V, OrderingTy Ordering); \ - bool cas(TY *Addr, TY ExpectedV, TY DesiredV, OrderingTy OrderingSucc, \ - OrderingTy OrderingFail); - -#define ATOMIC_FP_ONLY_OP(TY) \ - TY min(TY *Addr, TY V, OrderingTy Ordering); \ - TY max(TY *Addr, TY V, OrderingTy Ordering); - -#define ATOMIC_INT_ONLY_OP(TY) \ - TY min(TY *Addr, TY V, OrderingTy Ordering); \ - TY max(TY *Addr, TY V, OrderingTy Ordering); \ - TY bit_or(TY *Addr, TY V, OrderingTy Ordering); \ - TY bit_and(TY *Addr, TY V, OrderingTy Ordering); \ - TY bit_xor(TY *Addr, TY V, OrderingTy Ordering); - -#define ATOMIC_FP_OP(TY) \ - ATOMIC_FP_ONLY_OP(TY) \ - ATOMIC_COMMON_OP(TY) - -#define ATOMIC_INT_OP(TY) \ - ATOMIC_INT_ONLY_OP(TY) \ - ATOMIC_COMMON_OP(TY) - -// This needs to be kept in sync with the header. Also the reason we don't use -// templates here. -ATOMIC_INT_OP(int8_t) -ATOMIC_INT_OP(int16_t) -ATOMIC_INT_OP(int32_t) -ATOMIC_INT_OP(int64_t) -ATOMIC_INT_OP(uint8_t) -ATOMIC_INT_OP(uint16_t) -ATOMIC_INT_OP(uint32_t) -ATOMIC_INT_OP(uint64_t) -ATOMIC_FP_OP(float) -ATOMIC_FP_OP(double) - -#undef ATOMIC_INT_ONLY_OP -#undef ATOMIC_FP_ONLY_OP -#undef ATOMIC_COMMON_OP -#undef ATOMIC_INT_OP -#undef ATOMIC_FP_OP +template > +bool cas(Ty *Address, V ExpectedV, V DesiredV, atomic::OrderingTy OrderingSucc, + atomic::OrderingTy OrderingFail) { + return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false, + OrderingSucc, OrderingFail, + __MEMORY_SCOPE_DEVICE); +} + +template > +V add(Ty *Address, V Val, atomic::OrderingTy Ordering) { + return __scoped_atomic_fetch_add(Address, Val, Ordering, + __MEMORY_SCOPE_DEVICE); +} + +template > +V load(Ty *Address, atomic::OrderingTy Ordering) { + return add(Address, Ty(0), Ordering); +} + +template > +void store(Ty *Address, V Val, atomic::OrderingTy Ordering) { + __scoped_atomic_store_n(Address, Val, Ordering, __MEMORY_SCOPE_DEVICE); +} + +template > +V mul(Ty *Address, V Val, atomic::OrderingTy Ordering) { + Ty TypedCurrentVal, TypedResultVal, TypedNewVal; + bool Success; + do { + TypedCurrentVal = atomic::load(Address, Ordering); + TypedNewVal = TypedCurrentVal * Val; + Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering, + atomic::relaxed); + } while (!Success); + return TypedResultVal; +} + +template > +utils::enable_if_t, V> +max(Ty *Address, V Val, atomic::OrderingTy Ordering) { + return __scoped_atomic_fetch_max(Address, Val, Ordering, + __MEMORY_SCOPE_DEVICE); +} + +template > +utils::enable_if_t, V> +max(Ty *Address, V Val, atomic::OrderingTy Ordering) { + if (Val >= 0) + return utils::convertViaPun( + max((int32_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::convertViaPun( + min((uint32_t *)Address, utils::convertViaPun(Val), Ordering)); +} + +template > +utils::enable_if_t, V> +max(Ty *Address, V Val, atomic::OrderingTy Ordering) { + if (Val >= 0) + return utils::convertViaPun( + max((int64_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::convertViaPun( + min((uint64_t *)Address, utils::convertViaPun(Val), Ordering)); +} + +template > +utils::enable_if_t, V> +min(Ty *Address, V Val, atomic::OrderingTy Ordering) { + return __scoped_atomic_fetch_min(Address, Val, Ordering, + __MEMORY_SCOPE_DEVICE); +} + +// TODO: Implement this with __atomic_fetch_max and remove the duplication. +template > +utils::enable_if_t, V> +min(Ty *Address, V Val, atomic::OrderingTy Ordering) { + if (Val >= 0) + return utils::convertViaPun( + min((int32_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::convertViaPun( + max((uint32_t *)Address, utils::convertViaPun(Val), Ordering)); +} + +// TODO: Implement this with __atomic_fetch_max and remove the duplication. +template > +utils::enable_if_t, V> +min(Ty *Address, utils::remove_addrspace_t Val, + atomic::OrderingTy Ordering) { + if (Val >= 0) + return utils::convertViaPun( + min((int64_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::convertViaPun( + max((uint64_t *)Address, utils::convertViaPun(Val), Ordering)); +} + +template > +V bit_or(Ty *Address, V Val, atomic::OrderingTy Ordering) { + return __scoped_atomic_fetch_or(Address, Val, Ordering, + __MEMORY_SCOPE_DEVICE); +} + +template > +V bit_and(Ty *Address, V Val, atomic::OrderingTy Ordering) { + return __scoped_atomic_fetch_and(Address, Val, Ordering, + __MEMORY_SCOPE_DEVICE); +} + +template > +V bit_xor(Ty *Address, V Val, atomic::OrderingTy Ordering) { + return __scoped_atomic_fetch_xor(Address, Val, Ordering, + __MEMORY_SCOPE_DEVICE); +} + +static inline uint32_t atomicExchange(uint32_t *Address, uint32_t Val, + atomic::OrderingTy Ordering) { + uint32_t R; + __scoped_atomic_exchange(Address, &Val, &R, Ordering, __MEMORY_SCOPE_DEVICE); + return R; +} ///} @@ -145,4 +220,6 @@ void system(atomic::OrderingTy Ordering); } // namespace ompx +#pragma omp end declare target + #endif diff --git a/offload/DeviceRTL/src/Synchronization.cpp b/offload/DeviceRTL/src/Synchronization.cpp index 3aee23a865d3c..72a97ae3fcfb4 100644 --- a/offload/DeviceRTL/src/Synchronization.cpp +++ b/offload/DeviceRTL/src/Synchronization.cpp @@ -31,95 +31,6 @@ namespace impl { /// NOTE: This function needs to be implemented by every target. uint32_t atomicInc(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering, atomic::MemScopeTy MemScope); - -template -Ty atomicAdd(Ty *Address, Ty Val, atomic::OrderingTy Ordering) { - return __scoped_atomic_fetch_add(Address, Val, Ordering, - __MEMORY_SCOPE_DEVICE); -} - -template -Ty atomicMul(Ty *Address, Ty V, atomic::OrderingTy Ordering) { - Ty TypedCurrentVal, TypedResultVal, TypedNewVal; - bool Success; - do { - TypedCurrentVal = atomic::load(Address, Ordering); - TypedNewVal = TypedCurrentVal * V; - Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering, - atomic::relaxed); - } while (!Success); - return TypedResultVal; -} - -template Ty atomicLoad(Ty *Address, atomic::OrderingTy Ordering) { - return atomicAdd(Address, Ty(0), Ordering); -} - -template -void atomicStore(Ty *Address, Ty Val, atomic::OrderingTy Ordering) { - __scoped_atomic_store_n(Address, Val, Ordering, __MEMORY_SCOPE_DEVICE); -} - -template -bool atomicCAS(Ty *Address, Ty ExpectedV, Ty DesiredV, - atomic::OrderingTy OrderingSucc, - atomic::OrderingTy OrderingFail) { - return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false, - OrderingSucc, OrderingFail, - __MEMORY_SCOPE_DEVICE); -} - -template -Ty atomicMin(Ty *Address, Ty Val, atomic::OrderingTy Ordering) { - return __scoped_atomic_fetch_min(Address, Val, Ordering, - __MEMORY_SCOPE_DEVICE); -} - -template -Ty atomicMax(Ty *Address, Ty Val, atomic::OrderingTy Ordering) { - return __scoped_atomic_fetch_max(Address, Val, Ordering, - __MEMORY_SCOPE_DEVICE); -} - -// TODO: Implement this with __atomic_fetch_max and remove the duplication. -template -Ty atomicMinFP(Ty *Address, Ty Val, atomic::OrderingTy Ordering) { - if (Val >= 0) - return atomicMin((STy *)Address, utils::convertViaPun(Val), Ordering); - return atomicMax((UTy *)Address, utils::convertViaPun(Val), Ordering); -} - -template -Ty atomicMaxFP(Ty *Address, Ty Val, atomic::OrderingTy Ordering) { - if (Val >= 0) - return atomicMax((STy *)Address, utils::convertViaPun(Val), Ordering); - return atomicMin((UTy *)Address, utils::convertViaPun(Val), Ordering); -} - -template -Ty atomicOr(Ty *Address, Ty Val, atomic::OrderingTy Ordering) { - return __scoped_atomic_fetch_or(Address, Val, Ordering, - __MEMORY_SCOPE_DEVICE); -} - -template -Ty atomicAnd(Ty *Address, Ty Val, atomic::OrderingTy Ordering) { - return __scoped_atomic_fetch_and(Address, Val, Ordering, - __MEMORY_SCOPE_DEVICE); -} - -template -Ty atomicXOr(Ty *Address, Ty Val, atomic::OrderingTy Ordering) { - return __scoped_atomic_fetch_xor(Address, Val, Ordering, - __MEMORY_SCOPE_DEVICE); -} - -uint32_t atomicExchange(uint32_t *Address, uint32_t Val, - atomic::OrderingTy Ordering) { - uint32_t R; - __scoped_atomic_exchange(Address, &Val, &R, Ordering, __MEMORY_SCOPE_DEVICE); - return R; -} ///} // Forward declarations defined to be defined for AMDGCN and NVPTX. @@ -279,8 +190,8 @@ void setCriticalLock(omp_lock_t *Lock) { uint64_t LowestActiveThread = utils::ffs(mapping::activemask()) - 1; if (mapping::getThreadIdInWarp() == LowestActiveThread) { fenceKernel(atomic::release); - while (!atomicCAS((uint32_t *)Lock, UNSET, SET, atomic::relaxed, - atomic::relaxed)) { + while ( + !cas((uint32_t *)Lock, UNSET, SET, atomic::relaxed, atomic::relaxed)) { __builtin_amdgcn_s_sleep(32); } fenceKernel(atomic::aquire); @@ -341,7 +252,7 @@ void unsetLock(omp_lock_t *Lock) { } int testLock(omp_lock_t *Lock) { - return atomicAdd((uint32_t *)Lock, 0u, atomic::seq_cst); + return atomic::add((uint32_t *)Lock, 0u, atomic::seq_cst); } void initLock(omp_lock_t *Lock) { unsetLock(Lock); } @@ -350,8 +261,8 @@ void destroyLock(omp_lock_t *Lock) { unsetLock(Lock); } void setLock(omp_lock_t *Lock) { // TODO: not sure spinning is a good idea here.. - while (atomicCAS((uint32_t *)Lock, UNSET, SET, atomic::seq_cst, - atomic::seq_cst) != UNSET) { + while (atomic::cas((uint32_t *)Lock, UNSET, SET, atomic::seq_cst, + atomic::seq_cst) != UNSET) { int32_t start = __nvvm_read_ptx_sreg_clock(); int32_t now; for (;;) { @@ -394,82 +305,6 @@ void fence::kernel(atomic::OrderingTy Ordering) { impl::fenceKernel(Ordering); } void fence::system(atomic::OrderingTy Ordering) { impl::fenceSystem(Ordering); } -#define ATOMIC_COMMON_OP(TY) \ - TY atomic::add(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - return impl::atomicAdd(Addr, V, Ordering); \ - } \ - TY atomic::mul(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - return impl::atomicMul(Addr, V, Ordering); \ - } \ - TY atomic::load(TY *Addr, atomic::OrderingTy Ordering) { \ - return impl::atomicLoad(Addr, Ordering); \ - } \ - bool atomic::cas(TY *Addr, TY ExpectedV, TY DesiredV, \ - atomic::OrderingTy OrderingSucc, \ - atomic::OrderingTy OrderingFail) { \ - return impl::atomicCAS(Addr, ExpectedV, DesiredV, OrderingSucc, \ - OrderingFail); \ - } - -#define ATOMIC_FP_ONLY_OP(TY, STY, UTY) \ - TY atomic::min(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - return impl::atomicMinFP(Addr, V, Ordering); \ - } \ - TY atomic::max(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - return impl::atomicMaxFP(Addr, V, Ordering); \ - } \ - void atomic::store(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - impl::atomicStore(reinterpret_cast(Addr), \ - utils::convertViaPun(V), Ordering); \ - } - -#define ATOMIC_INT_ONLY_OP(TY) \ - TY atomic::min(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - return impl::atomicMin(Addr, V, Ordering); \ - } \ - TY atomic::max(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - return impl::atomicMax(Addr, V, Ordering); \ - } \ - TY atomic::bit_or(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - return impl::atomicOr(Addr, V, Ordering); \ - } \ - TY atomic::bit_and(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - return impl::atomicAnd(Addr, V, Ordering); \ - } \ - TY atomic::bit_xor(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - return impl::atomicXOr(Addr, V, Ordering); \ - } \ - void atomic::store(TY *Addr, TY V, atomic::OrderingTy Ordering) { \ - impl::atomicStore(Addr, V, Ordering); \ - } - -#define ATOMIC_FP_OP(TY, STY, UTY) \ - ATOMIC_FP_ONLY_OP(TY, STY, UTY) \ - ATOMIC_COMMON_OP(TY) - -#define ATOMIC_INT_OP(TY) \ - ATOMIC_INT_ONLY_OP(TY) \ - ATOMIC_COMMON_OP(TY) - -// This needs to be kept in sync with the header. Also the reason we don't use -// templates here. -ATOMIC_INT_OP(int8_t) -ATOMIC_INT_OP(int16_t) -ATOMIC_INT_OP(int32_t) -ATOMIC_INT_OP(int64_t) -ATOMIC_INT_OP(uint8_t) -ATOMIC_INT_OP(uint16_t) -ATOMIC_INT_OP(uint32_t) -ATOMIC_INT_OP(uint64_t) -ATOMIC_FP_OP(float, int32_t, uint32_t) -ATOMIC_FP_OP(double, int64_t, uint64_t) - -#undef ATOMIC_INT_ONLY_OP -#undef ATOMIC_FP_ONLY_OP -#undef ATOMIC_COMMON_OP -#undef ATOMIC_INT_OP -#undef ATOMIC_FP_OP - uint32_t atomic::inc(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering, atomic::MemScopeTy MemScope) { return impl::atomicInc(Addr, V, Ordering, MemScope); From f53cb84df6b80458cb4d5ab7398a590356a3a952 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 9 Jan 2025 13:59:21 -0600 Subject: [PATCH 50/79] [OpenMP] Use __builtin_bit_cast instead of UB type punning (#122325) Summary: Use a normal bitcast, remove from the shared utils since it's not available in GCC 7.4 --- offload/DeviceRTL/include/DeviceUtils.h | 5 ++++ offload/DeviceRTL/include/Synchronization.h | 32 ++++++++++----------- offload/DeviceRTL/src/Mapping.cpp | 8 +++--- offload/include/Shared/Utils.h | 5 ---- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h index fb00d6c755255..fa66b973a4f5e 100644 --- a/offload/DeviceRTL/include/DeviceUtils.h +++ b/offload/DeviceRTL/include/DeviceUtils.h @@ -60,6 +60,11 @@ struct remove_addrspace : type_identity {}; template using remove_addrspace_t = typename remove_addrspace::type; +template inline To bitCast(From V) { + static_assert(sizeof(To) == sizeof(From), "Bad conversion"); + return __builtin_bit_cast(To, V); +} + /// Return the value \p Var from thread Id \p SrcLane in the warp if the thread /// is identified by \p Mask. int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width); diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h index ae065850d824c..e1968675550d4 100644 --- a/offload/DeviceRTL/include/Synchronization.h +++ b/offload/DeviceRTL/include/Synchronization.h @@ -98,20 +98,20 @@ template > utils::enable_if_t, V> max(Ty *Address, V Val, atomic::OrderingTy Ordering) { if (Val >= 0) - return utils::convertViaPun( - max((int32_t *)Address, utils::convertViaPun(Val), Ordering)); - return utils::convertViaPun( - min((uint32_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::bitCast( + max((int32_t *)Address, utils::bitCast(Val), Ordering)); + return utils::bitCast( + min((uint32_t *)Address, utils::bitCast(Val), Ordering)); } template > utils::enable_if_t, V> max(Ty *Address, V Val, atomic::OrderingTy Ordering) { if (Val >= 0) - return utils::convertViaPun( - max((int64_t *)Address, utils::convertViaPun(Val), Ordering)); - return utils::convertViaPun( - min((uint64_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::bitCast( + max((int64_t *)Address, utils::bitCast(Val), Ordering)); + return utils::bitCast( + min((uint64_t *)Address, utils::bitCast(Val), Ordering)); } template > @@ -126,10 +126,10 @@ template > utils::enable_if_t, V> min(Ty *Address, V Val, atomic::OrderingTy Ordering) { if (Val >= 0) - return utils::convertViaPun( - min((int32_t *)Address, utils::convertViaPun(Val), Ordering)); - return utils::convertViaPun( - max((uint32_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::bitCast( + min((int32_t *)Address, utils::bitCast(Val), Ordering)); + return utils::bitCast( + max((uint32_t *)Address, utils::bitCast(Val), Ordering)); } // TODO: Implement this with __atomic_fetch_max and remove the duplication. @@ -138,10 +138,10 @@ utils::enable_if_t, V> min(Ty *Address, utils::remove_addrspace_t Val, atomic::OrderingTy Ordering) { if (Val >= 0) - return utils::convertViaPun( - min((int64_t *)Address, utils::convertViaPun(Val), Ordering)); - return utils::convertViaPun( - max((uint64_t *)Address, utils::convertViaPun(Val), Ordering)); + return utils::bitCast( + min((int64_t *)Address, utils::bitCast(Val), Ordering)); + return utils::bitCast( + max((uint64_t *)Address, utils::bitCast(Val), Ordering)); } template > diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp index 881bd12f03405..8583a539824c8 100644 --- a/offload/DeviceRTL/src/Mapping.cpp +++ b/offload/DeviceRTL/src/Mapping.cpp @@ -371,8 +371,8 @@ int ompx_shfl_down_sync_i(uint64_t mask, int var, unsigned delta, int width) { float ompx_shfl_down_sync_f(uint64_t mask, float var, unsigned delta, int width) { - return utils::convertViaPun(utils::shuffleDown( - mask, utils::convertViaPun(var), delta, width)); + return utils::bitCast( + utils::shuffleDown(mask, utils::bitCast(var), delta, width)); } long ompx_shfl_down_sync_l(uint64_t mask, long var, unsigned delta, int width) { @@ -381,8 +381,8 @@ long ompx_shfl_down_sync_l(uint64_t mask, long var, unsigned delta, int width) { double ompx_shfl_down_sync_d(uint64_t mask, double var, unsigned delta, int width) { - return utils::convertViaPun(utils::shuffleDown( - mask, utils::convertViaPun(var), delta, width)); + return utils::bitCast( + utils::shuffleDown(mask, utils::bitCast(var), delta, width)); } } diff --git a/offload/include/Shared/Utils.h b/offload/include/Shared/Utils.h index 83a82678312c1..523e6bc505b81 100644 --- a/offload/include/Shared/Utils.h +++ b/offload/include/Shared/Utils.h @@ -68,11 +68,6 @@ inline uint32_t popc(uint64_t V) { return __builtin_popcountl(V); } -template inline DstTy convertViaPun(SrcTy V) { - static_assert(sizeof(DstTy) == sizeof(SrcTy), "Bad conversion"); - return *((DstTy *)(&V)); -} - } // namespace utils #endif // OMPTARGET_SHARED_UTILS_H From 3055e86c719c6cc72e50bb74a0e3b9cffebb41b5 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 9 Jan 2025 12:01:43 -0800 Subject: [PATCH 51/79] [MemProf] Disable cloning of callsites in recursive cycles by default (#122354) This disables the support added in PR121985 by default while we investigate a compile time crash. --- .../IPO/MemProfContextDisambiguation.cpp | 4 ++-- llvm/test/ThinLTO/X86/memprof-recursive.ll | 19 ++++++++++++++++++- .../MemProfContextDisambiguation/recursive.ll | 14 +++++++++++++- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 016db55c99c3e..3d0e5cb5f97b4 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -122,9 +122,9 @@ static cl::opt cl::desc("Max depth to recursively search for missing " "frames through tail calls.")); -// By default enable cloning of callsites involved with recursive cycles +// Optionally enable cloning of callsites involved with recursive cycles static cl::opt AllowRecursiveCallsites( - "memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, + "memprof-allow-recursive-callsites", cl::init(false), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles")); // When disabled, try to detect and prevent cloning of recursive contexts. diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll index 2b1d7081b7610..1f4f75460b070 100644 --- a/llvm/test/ThinLTO/X86/memprof-recursive.ll +++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll @@ -5,7 +5,7 @@ ; RUN: opt -thinlto-bc %s >%t.o -;; By default we should enable cloning of contexts involved with recursive +;; Check behavior when we enable cloning of contexts involved with recursive ;; cycles, but not through the cycle itself. I.e. until full support for ;; recursion is added, the cloned recursive call from C back to B (line 12) will ;; not be updated to call a clone. @@ -18,6 +18,7 @@ ; RUN: -r=%t.o,_Znam, \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -memprof-allow-recursive-callsites=true \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS @@ -38,6 +39,21 @@ ; RUN: --implicit-check-not="created clone" \ ; RUN: --implicit-check-not="marked with memprof allocation attribute cold" +;; Check the default behavior (disabled recursive callsites). +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,_Z1Dv,plx \ +; RUN: -r=%t.o,_Z1Ci,plx \ +; RUN: -r=%t.o,_Z1Bi,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \ +; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ +; RUN: --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="marked with memprof allocation attribute cold" + ;; Skipping recursive contexts should prevent spurious call to cloned version of ;; B from the context starting at memprof_recursive.cc:19:13, which is actually ;; recursive (until that support is added). @@ -50,6 +66,7 @@ ; RUN: -r=%t.o,_Znam, \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -memprof-allow-recursive-callsites=true \ ; RUN: -memprof-allow-recursive-contexts=false \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll index 759d5115896c1..fd87cb535c42e 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll @@ -34,13 +34,14 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -;; By default we should enable cloning of contexts involved with recursive +;; Check behavior when we enable cloning of contexts involved with recursive ;; cycles, but not through the cycle itself. I.e. until full support for ;; recursion is added, the cloned recursive call from C back to B (line 12) will ;; not be updated to call a clone. ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -memprof-allow-recursive-callsites=true \ ; RUN: %s -S 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS @@ -56,12 +57,23 @@ ; RUN: --implicit-check-not="marked with memprof allocation attribute cold" \ ; RUN: --check-prefix=ALL +;; Check the default behavior (disabled recursive callsites). +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s \ +; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ +; RUN: --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="marked with memprof allocation attribute cold" \ +; RUN: --check-prefix=ALL + ;; Skipping recursive contexts should prevent spurious call to cloned version of ;; B from the context starting at memprof_recursive.cc:19:13, which is actually ;; recursive (until that support is added). ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -memprof-allow-recursive-callsites=true \ ; RUN: -memprof-allow-recursive-contexts=false \ ; RUN: %s -S 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ From 218f15cd1eee22933e505d7093ec2fe38a36ef3b Mon Sep 17 00:00:00 2001 From: Chris B Date: Thu, 9 Jan 2025 14:10:44 -0600 Subject: [PATCH 52/79] Add pre-merge workflow for HLSL testing (#122184) This adds a workflow for running HLSL tests on PRs that modify HLSL and DirectX code. The tests enabled here are the LLVM & Clang tests and the Offload execution tests: https://github.com/llvm-beanz/offload-test-suite/ --- .github/workflows/hlsl-matrix.yaml | 30 ++++++++++ .github/workflows/hlsl-test-all.yaml | 87 ++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 .github/workflows/hlsl-matrix.yaml create mode 100644 .github/workflows/hlsl-test-all.yaml diff --git a/.github/workflows/hlsl-matrix.yaml b/.github/workflows/hlsl-matrix.yaml new file mode 100644 index 0000000000000..c63a32acd2b3e --- /dev/null +++ b/.github/workflows/hlsl-matrix.yaml @@ -0,0 +1,30 @@ +name: HLSL Tests + +permissions: + contents: read + +on: + workflow_dispatch: + pull_request: + branches: + - main + paths: + - llvm/**/DirectX/** + - .github/workflows/hlsl* + - clang/*HLSL*/**/* + - clang/**/*HLSL* + - llvm/**/Frontend/HLSL/**/* + +jobs: + HLSL-Tests: + strategy: + fail-fast: false + matrix: + runs-on: + - hlsl-macos + + uses: ./.github/workflows/hlsl-test-all.yaml + with: + SKU: hlsl-macos + TestTarget: check-hlsl-clang-mtl # TODO: This target changes based on SKU + LLVM-ref: ${{ github.ref }} diff --git a/.github/workflows/hlsl-test-all.yaml b/.github/workflows/hlsl-test-all.yaml new file mode 100644 index 0000000000000..93a1c6d2662d4 --- /dev/null +++ b/.github/workflows/hlsl-test-all.yaml @@ -0,0 +1,87 @@ +name: HLSL Test + +permissions: + contents: read + +on: + workflow_call: + inputs: + OffloadTest-branch: + description: 'Test Suite Branch' + required: false + default: 'main' + type: string + LLVM-ref: + description: 'LLVM Branch' + required: false + default: 'main' + type: string + SKU: + required: true + type: string + TestTarget: + required: false + default: 'check-hlsl' + type: string + +jobs: + build: + runs-on: ${{ inputs.SKU }} + steps: + - name: Checkout DXC + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + repository: Microsoft/DirectXShaderCompiler + ref: main + path: DXC + submodules: true + - name: Checkout LLVM + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + ref: ${{ inputs.LLVM-branch }} + path: llvm-project + - name: Checkout OffloadTest + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + repository: llvm-beanz/offload-test-suite + ref: main + path: OffloadTest + - name: Checkout Golden Images + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + repository: llvm-beanz/offload-golden-images + ref: main + path: golden-images + - name: Setup Windows + if: runner.os == 'Windows' + uses: llvm/actions/setup-windows@main + with: + arch: amd64 + - name: Build DXC + run: | + cd DXC + mkdir build + cd build + cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -C ${{ github.workspace }}/DXC/cmake/caches/PredefinedParams.cmake -C ${{ github.workspace }}/OffloadTest/cmake/caches/sccache.cmake -DHLSL_DISABLE_SOURCE_GENERATION=On ${{ github.workspace }}/DXC/ + ninja dxv llvm-dis + - name: Build LLVM + run: | + cd llvm-project + mkdir build + cd build + cmake -G Ninja -DDXIL_DIS=${{ github.workspace }}/DXC/build/bin/llvm-dis -DLLVM_INCLUDE_DXIL_TESTS=On -DCMAKE_BUILD_TYPE=Release -C ${{ github.workspace }}/llvm-project/clang/cmake/caches/HLSL.cmake -C ${{ github.workspace }}/OffloadTest/cmake/caches/sccache.cmake -DDXC_DIR=${{ github.workspace }}/DXC/build/bin -DLLVM_EXTERNAL_OFFLOADTEST_SOURCE_DIR=${{ github.workspace }}/OffloadTest -DLLVM_EXTERNAL_PROJECTS="OffloadTest" -DLLVM_LIT_ARGS="--xunit-xml-output=testresults.xunit.xml -v" -DGOLDENIMAGE_DIR=${{ github.workspace }}/golden-images ${{ github.workspace }}/llvm-project/llvm/ + ninja hlsl-test-depends llvm-test-depends clang-test-depends + - name: Run HLSL Tests + run: | + cd llvm-project + cd build + ninja check-llvm + ninja check-clang + ninja check-hlsl-unit + ninja ${{ inputs.TestTarget }} + - name: Publish Test Results + uses: EnricoMi/publish-unit-test-result-action/macos@170bf24d20d201b842d7a52403b73ed297e6645b # v2 + if: always() && runner.os == 'macOS' + with: + comment_mode: off + files: llvm-project/build/**/testresults.xunit.xml From 9d5299eb61a64cd4df5fefa0299b0cf8d917978f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 9 Jan 2025 20:18:34 +0000 Subject: [PATCH 53/79] VT/test: pre-commit tests to enable samesign optz (#120257) Pre-commit some tests in preparation to teach ValueTracking's implied-cond about samesign. --- .../implied-condition-samesign.ll | 286 ++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 llvm/test/Analysis/ValueTracking/implied-condition-samesign.ll diff --git a/llvm/test/Analysis/ValueTracking/implied-condition-samesign.ll b/llvm/test/Analysis/ValueTracking/implied-condition-samesign.ll new file mode 100644 index 0000000000000..0ea69c2cd4d47 --- /dev/null +++ b/llvm/test/Analysis/ValueTracking/implied-condition-samesign.ll @@ -0,0 +1,286 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=instsimplify -S %s | FileCheck %s + +define i1 @incr_sle(i32 %i, i32 %len) { +; CHECK-LABEL: define i1 @incr_sle( +; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[I_INCR:%.*]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[I_GT_LEN:%.*]] = icmp samesign ugt i32 [[I]], [[LEN]] +; CHECK-NEXT: [[I_INCR_SGT_LEN:%.*]] = icmp sgt i32 [[I_INCR]], [[LEN]] +; CHECK-NEXT: [[RES:%.*]] = icmp sle i1 [[I_INCR_SGT_LEN]], [[I_GT_LEN]] +; CHECK-NEXT: ret i1 [[RES]] +; + %i.incr = add nsw nuw i32 %i, 1 + %i.gt.len = icmp samesign ugt i32 %i, %len + %i.incr.sgt.len = icmp sgt i32 %i.incr, %len + %res = icmp sle i1 %i.incr.sgt.len, %i.gt.len + ret i1 %res +} + +define i1 @incr_sle_no_nsw_nuw(i32 %i, i32 %len) { +; CHECK-LABEL: define i1 @incr_sle_no_nsw_nuw( +; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[I_INCR:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I_GT_LEN:%.*]] = icmp samesign ugt i32 [[I]], [[LEN]] +; CHECK-NEXT: [[I_INCR_SGT_LEN:%.*]] = icmp sgt i32 [[I_INCR]], [[LEN]] +; CHECK-NEXT: [[RES:%.*]] = icmp sle i1 [[I_INCR_SGT_LEN]], [[I_GT_LEN]] +; CHECK-NEXT: ret i1 [[RES]] +; + %i.incr = add i32 %i, 1 + %i.gt.len = icmp samesign ugt i32 %i, %len + %i.incr.sgt.len = icmp sgt i32 %i.incr, %len + %res = icmp sle i1 %i.incr.sgt.len, %i.gt.len + ret i1 %res +} + +define i1 @incr_sge(i32 %i, i32 %len) { +; CHECK-LABEL: define i1 @incr_sge( +; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[I_INCR:%.*]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[I_LT_LEN:%.*]] = icmp samesign ult i32 [[I]], [[LEN]] +; CHECK-NEXT: [[I_INCR_SLT_LEN:%.*]] = icmp slt i32 [[I_INCR]], [[LEN]] +; CHECK-NEXT: [[RES:%.*]] = icmp sge i1 [[I_INCR_SLT_LEN]], [[I_LT_LEN]] +; CHECK-NEXT: ret i1 [[RES]] +; + %i.incr = add nsw nuw i32 %i, 1 + %i.lt.len = icmp samesign ult i32 %i, %len + %i.incr.slt.len = icmp slt i32 %i.incr, %len + %res = icmp sge i1 %i.incr.slt.len, %i.lt.len + ret i1 %res +} + +define i1 @incr_sge_no_nsw_nuw(i32 %i, i32 %len) { +; CHECK-LABEL: define i1 @incr_sge_no_nsw_nuw( +; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[I_INCR:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I_LT_LEN:%.*]] = icmp samesign ult i32 [[I]], [[LEN]] +; CHECK-NEXT: [[I_INCR_SLT_LEN:%.*]] = icmp slt i32 [[I_INCR]], [[LEN]] +; CHECK-NEXT: [[RES:%.*]] = icmp sge i1 [[I_INCR_SLT_LEN]], [[I_LT_LEN]] +; CHECK-NEXT: ret i1 [[RES]] +; + %i.incr = add i32 %i, 1 + %i.lt.len = icmp samesign ult i32 %i, %len + %i.incr.slt.len = icmp slt i32 %i.incr, %len + %res = icmp sge i1 %i.incr.slt.len, %i.lt.len + ret i1 %res +} + +define i1 @incr_ule(i32 %i, i32 %len) { +; CHECK-LABEL: define i1 @incr_ule( +; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[I_INCR:%.*]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[I_GT_LEN:%.*]] = icmp samesign ugt i32 [[I]], [[LEN]] +; CHECK-NEXT: [[I_INCR_SGT_LEN:%.*]] = icmp sgt i32 [[I_INCR]], [[LEN]] +; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[I_GT_LEN]], [[I_INCR_SGT_LEN]] +; CHECK-NEXT: ret i1 [[RES]] +; + %i.incr = add nsw nuw i32 %i, 1 + %i.gt.len = icmp samesign ugt i32 %i, %len + %i.incr.sgt.len = icmp sgt i32 %i.incr, %len + %res = icmp ule i1 %i.gt.len, %i.incr.sgt.len + ret i1 %res +} + +define i1 @incr_ule_no_nsw_nuw(i32 %i, i32 %len) { +; CHECK-LABEL: define i1 @incr_ule_no_nsw_nuw( +; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[I_INCR:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I_GT_LEN:%.*]] = icmp samesign ugt i32 [[I]], [[LEN]] +; CHECK-NEXT: [[I_INCR_SGT_LEN:%.*]] = icmp sgt i32 [[I_INCR]], [[LEN]] +; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[I_GT_LEN]], [[I_INCR_SGT_LEN]] +; CHECK-NEXT: ret i1 [[RES]] +; + %i.incr = add i32 %i, 1 + %i.gt.len = icmp samesign ugt i32 %i, %len + %i.incr.sgt.len = icmp sgt i32 %i.incr, %len + %res = icmp ule i1 %i.gt.len, %i.incr.sgt.len + ret i1 %res +} + +define i1 @incr_uge(i32 %i, i32 %len) { +; CHECK-LABEL: define i1 @incr_uge( +; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[I_INCR:%.*]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[I_LT_LEN:%.*]] = icmp samesign ult i32 [[I]], [[LEN]] +; CHECK-NEXT: [[I_INCR_SLT_LEN:%.*]] = icmp slt i32 [[I_INCR]], [[LEN]] +; CHECK-NEXT: [[RES:%.*]] = icmp uge i1 [[I_LT_LEN]], [[I_INCR_SLT_LEN]] +; CHECK-NEXT: ret i1 [[RES]] +; + %i.incr = add nsw nuw i32 %i, 1 + %i.lt.len = icmp samesign ult i32 %i, %len + %i.incr.slt.len = icmp slt i32 %i.incr, %len + %res = icmp uge i1 %i.lt.len, %i.incr.slt.len + ret i1 %res +} + +define i1 @incr_uge_no_nsw_nuw(i32 %i, i32 %len) { +; CHECK-LABEL: define i1 @incr_uge_no_nsw_nuw( +; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[I_INCR:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: [[I_LT_LEN:%.*]] = icmp samesign ult i32 [[I]], [[LEN]] +; CHECK-NEXT: [[I_INCR_SLT_LEN:%.*]] = icmp slt i32 [[I_INCR]], [[LEN]] +; CHECK-NEXT: [[RES:%.*]] = icmp uge i1 [[I_LT_LEN]], [[I_INCR_SLT_LEN]] +; CHECK-NEXT: ret i1 [[RES]] +; + %i.incr = add i32 %i, 1 + %i.lt.len = icmp samesign ult i32 %i, %len + %i.incr.slt.len = icmp slt i32 %i.incr, %len + %res = icmp uge i1 %i.lt.len, %i.incr.slt.len + ret i1 %res +} + +define i1 @sgt_implies_ge_via_assume(i32 %i, i32 %j) { +; CHECK-LABEL: define i1 @sgt_implies_ge_via_assume( +; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]]) { +; CHECK-NEXT: [[I_SGT_J:%.*]] = icmp sgt i32 [[I]], [[J]] +; CHECK-NEXT: call void @llvm.assume(i1 [[I_SGT_J]]) +; CHECK-NEXT: [[I_GE_J:%.*]] = icmp samesign uge i32 [[I]], [[J]] +; CHECK-NEXT: ret i1 [[I_GE_J]] +; + %i.sgt.j = icmp sgt i32 %i, %j + call void @llvm.assume(i1 %i.sgt.j) + %i.ge.j = icmp samesign uge i32 %i, %j + ret i1 %i.ge.j +} + +define i32 @gt_implies_sge_dominating(i32 %a, i32 %len) { +; CHECK-LABEL: define i32 @gt_implies_sge_dominating( +; CHECK-SAME: i32 [[A:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[A_GT_LEN:%.*]] = icmp samesign ugt i32 [[A]], [[LEN]] +; CHECK-NEXT: br i1 [[A_GT_LEN]], label %[[TAKEN:.*]], label %[[END:.*]] +; CHECK: [[TAKEN]]: +; CHECK-NEXT: [[A_SGE_LEN:%.*]] = icmp sge i32 [[A]], [[LEN]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[A_SGE_LEN]], i32 30, i32 0 +; CHECK-NEXT: ret i32 [[RES]] +; CHECK: [[END]]: +; CHECK-NEXT: ret i32 -1 +; +entry: + %a.gt.len = icmp samesign ugt i32 %a, %len + br i1 %a.gt.len, label %taken, label %end + +taken: + %a.sge.len = icmp sge i32 %a, %len + %res = select i1 %a.sge.len, i32 30, i32 0 + ret i32 %res + +end: + ret i32 -1 +} + +define i32 @gt_implies_sge_dominating_cr(i32 %a, i32 %len) { +; CHECK-LABEL: define i32 @gt_implies_sge_dominating_cr( +; CHECK-SAME: i32 [[A:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[A_GT_20:%.*]] = icmp samesign ugt i32 [[A]], 20 +; CHECK-NEXT: br i1 [[A_GT_20]], label %[[TAKEN:.*]], label %[[END:.*]] +; CHECK: [[TAKEN]]: +; CHECK-NEXT: [[A_SGE_10:%.*]] = icmp sge i32 [[A]], 10 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[A_SGE_10]], i32 30, i32 0 +; CHECK-NEXT: ret i32 [[RES]] +; CHECK: [[END]]: +; CHECK-NEXT: ret i32 -1 +; +entry: + %a.gt.20 = icmp samesign ugt i32 %a, 20 + br i1 %a.gt.20, label %taken, label %end + +taken: + %a.sge.10 = icmp sge i32 %a, 10 + %res = select i1 %a.sge.10, i32 30, i32 0 + ret i32 %res + +end: + ret i32 -1 +} + +define i32 @sgt_implies_ge_dominating_cr(i32 %a, i32 %len) { +; CHECK-LABEL: define i32 @sgt_implies_ge_dominating_cr( +; CHECK-SAME: i32 [[A:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[A_SGT_MINUS_10:%.*]] = icmp sgt i32 [[A]], -10 +; CHECK-NEXT: br i1 [[A_SGT_MINUS_10]], label %[[TAKEN:.*]], label %[[END:.*]] +; CHECK: [[TAKEN]]: +; CHECK-NEXT: [[A_GE_MINUS_20:%.*]] = icmp samesign uge i32 [[A]], -20 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[A_GE_MINUS_20]], i32 30, i32 0 +; CHECK-NEXT: ret i32 [[RES]] +; CHECK: [[END]]: +; CHECK-NEXT: ret i32 -1 +; +entry: + %a.sgt.minus.10 = icmp sgt i32 %a, -10 + br i1 %a.sgt.minus.10, label %taken, label %end + +taken: + %a.ge.minus.20 = icmp samesign uge i32 %a, -20 + %res = select i1 %a.ge.minus.20, i32 30, i32 0 + ret i32 %res + +end: + ret i32 -1 +} + +define i32 @gt_sub_nsw(i32 %x, i32 %y) { +; CHECK-LABEL: define i32 @gt_sub_nsw( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[X_GT_Y:%.*]] = icmp samesign ugt i32 [[X]], [[Y]] +; CHECK-NEXT: br i1 [[X_GT_Y]], label %[[TAKEN:.*]], label %[[END:.*]] +; CHECK: [[TAKEN]]: +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[SUB]], 1 +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[SUB]], -1 +; CHECK-NEXT: [[ABSCOND:%.*]] = icmp samesign ult i32 [[SUB]], -1 +; CHECK-NEXT: [[ABS:%.*]] = select i1 [[ABSCOND]], i32 [[NEG]], i32 [[ADD]] +; CHECK-NEXT: ret i32 [[ABS]] +; CHECK: [[END]]: +; CHECK-NEXT: ret i32 0 +; +entry: + %x.gt.y = icmp samesign ugt i32 %x, %y + br i1 %x.gt.y, label %taken, label %end + +taken: + %sub = sub nsw i32 %x, %y + %add = add nsw i32 %sub, 1 + %neg = xor i32 %sub, -1 + %abscond = icmp samesign ult i32 %sub, -1 + %abs = select i1 %abscond, i32 %neg, i32 %add + ret i32 %abs + +end: + ret i32 0 +} + +define i32 @ge_sub_nsw(i32 %x, i32 %y) { +; CHECK-LABEL: define i32 @ge_sub_nsw( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[X_GE_Y:%.*]] = icmp samesign uge i32 [[X]], [[Y]] +; CHECK-NEXT: br i1 [[X_GE_Y]], label %[[TAKEN:.*]], label %[[END:.*]] +; CHECK: [[TAKEN]]: +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X]], [[Y]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[SUB]], 1 +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[SUB]], -1 +; CHECK-NEXT: [[ABSCOND:%.*]] = icmp samesign ult i32 [[SUB]], -1 +; CHECK-NEXT: [[ABS:%.*]] = select i1 [[ABSCOND]], i32 [[NEG]], i32 [[ADD]] +; CHECK-NEXT: ret i32 [[ABS]] +; CHECK: [[END]]: +; CHECK-NEXT: ret i32 0 +; +entry: + %x.ge.y = icmp samesign uge i32 %x, %y + br i1 %x.ge.y, label %taken, label %end + +taken: + %sub = sub nsw i32 %x, %y + %add = add nsw i32 %sub, 1 + %neg = xor i32 %sub, -1 + %abscond = icmp samesign ult i32 %sub, -1 + %abs = select i1 %abscond, i32 %neg, i32 %add + ret i32 %abs + +end: + ret i32 0 +} From f791a4f19f6c99feccfe59e0724d9215bee985cf Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 9 Jan 2025 21:38:29 +0100 Subject: [PATCH 54/79] [bazel] Add missing dependency for cbcb7ad32e6faca1f4c0f2f436e6076774104e17 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index e823af2f14712..274cec0c187a4 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -10571,6 +10571,7 @@ cc_library( ":OpenACCTypeInterfacesIncGen", ":OpenACCTypesIncGen", ":SideEffectInterfaces", + ":Support", ":TransformUtils", "//llvm:Support", ], From d797d94185cfb3eadaef6103b0d2b2f312e4f432 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 9 Jan 2025 21:39:14 +0100 Subject: [PATCH 55/79] [bazel] Port 0aa831e0edb1c1deabb96ce2435667cc82bac79b --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 2 ++ 2 files changed, 3 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 274cec0c187a4..5c2a77ca67fd4 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5717,6 +5717,7 @@ cc_library( ":SCFDialect", ":SideEffectInterfaces", ":Support", + ":ValueBoundsOpInterface", "//llvm:Core", "//llvm:Support", ], diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 7d51a3829e912..873fb2d18bfb2 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -548,6 +548,7 @@ cc_library( "//llvm:Support", "//mlir:ConvertToSPIRV", "//mlir:FuncDialect", + "//mlir:GPUDialect", "//mlir:GPUToSPIRV", "//mlir:GPUTransforms", "//mlir:IR", @@ -695,6 +696,7 @@ cc_library( "//mlir:ArithTransforms", "//mlir:DialectUtils", "//mlir:FuncDialect", + "//mlir:FunctionInterfaces", "//mlir:IR", "//mlir:MemRefDialect", "//mlir:Pass", From 8ea8e7f52908a831ab44ffca1e0c8fe207a409c8 Mon Sep 17 00:00:00 2001 From: Tom Honermann Date: Thu, 9 Jan 2025 15:42:29 -0500 Subject: [PATCH 56/79] [SYCL] Basic diagnostics for the sycl_kernel_entry_point attribute. (#120327) The `sycl_kernel_entry_point` attribute is used to declare a function that defines a pattern for an offload kernel entry point. The attribute requires a single type argument that specifies a class type that meets the requirements for a SYCL kernel name as described in section 5.2, "Naming of kernels", of the SYCL 2020 specification. A unique kernel name type is required for each function declared with the attribute. The attribute may not first appear on a declaration that follows a definition of the function. The function is required to have a non-deduced `void` return type. The function must not be a non-static member function, be deleted or defaulted, be declared with the `constexpr` or `consteval` specifiers, be declared with the `[[noreturn]]` attribute, be a coroutine, or accept variadic arguments. Diagnostics are not yet provided for the following: - Use of a type as a kernel name that does not satisfy the forward declarability requirements specified in section 5.2, "Naming of kernels", of the SYCL 2020 specification. - Use of a type as a parameter of the attributed function that does not satisfy the kernel parameter requirements specified in section 4.12.4, "Rules for parameter passing to kernels", of the SYCL 2020 specification (each such function parameter constitutes a kernel parameter). - Use of language features that are not permitted in device functions as specified in section 5.4, "Language restrictions for device functions", of the SYCL 2020 specification. There are several issues noted by various FIXME comments. - The diagnostic generated for kernel name conflicts needs additional work to better detail the relevant source locations; such as the location of each declaration as well as the original source of each kernel name. - A number of the tests illustrate spurious errors being produced due to attributes that appertain to function templates being instantiated too early (during overload resolution as opposed to after an overload is selected). Included changes allow the `SYCLKernelEntryPointAttr` attribute to be marked as invalid if a `sycl_kernel_entry_point` attribute is used incorrectly. This is intended to prevent trying to emit an offload kernel entry point without having to mark the associated function as invalid since doing so would affect overload resolution; which this attribute should not do. Unfortunately, Clang eagerly instantiates attributes that appertain to functions with the result that errors might be issued for function declarations that are never selected by overload resolution. Tests have been added to demonstrate this. Further work will be needed to address these issues (for this and other attributes). --- clang/include/clang/AST/ASTContext.h | 10 + clang/include/clang/Basic/Attr.td | 13 +- clang/include/clang/Basic/AttrDocs.td | 2 +- clang/include/clang/Basic/DiagnosticGroups.td | 1 + .../clang/Basic/DiagnosticSemaKinds.td | 27 ++ clang/include/clang/Sema/SemaSYCL.h | 2 + clang/lib/AST/ASTContext.cpp | 13 + clang/lib/Sema/SemaDecl.cpp | 36 +- clang/lib/Sema/SemaLambda.cpp | 5 + clang/lib/Sema/SemaSYCL.cpp | 156 ++++++++ clang/lib/Serialization/ASTReaderDecl.cpp | 13 +- .../ast-dump-sycl-kernel-entry-point.cpp | 23 +- ...-kernel-entry-point-attr-appertainment.cpp | 352 ++++++++++++++++++ .../sycl-kernel-entry-point-attr-grammar.cpp | 15 - ...el-entry-point-attr-kernel-name-module.cpp | 104 ++++++ ...ernel-entry-point-attr-kernel-name-pch.cpp | 36 ++ ...cl-kernel-entry-point-attr-kernel-name.cpp | 118 ++++++ .../sycl-kernel-entry-point-attr-sfinae.cpp | 65 ++++ 18 files changed, 966 insertions(+), 25 deletions(-) create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 1e89a6805ce9c..0e07c5d6ce8fb 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -3360,6 +3360,16 @@ class ASTContext : public RefCountedBase { /// this function. void registerSYCLEntryPointFunction(FunctionDecl *FD); + /// Given a type used as a SYCL kernel name, returns a reference to the + /// metadata generated from the corresponding SYCL kernel entry point. + /// Aborts if the provided type is not a registered SYCL kernel name. + const SYCLKernelInfo &getSYCLKernelInfo(QualType T) const; + + /// Returns a pointer to the metadata generated from the corresponding + /// SYCLkernel entry point if the provided type corresponds to a registered + /// SYCL kernel name. Returns a null pointer otherwise. + const SYCLKernelInfo *findSYCLKernelInfo(QualType T) const; + //===--------------------------------------------------------------------===// // Statistics //===--------------------------------------------------------------------===// diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index e51a74655dd5e..5039c20d8b73b 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1516,11 +1516,22 @@ def SYCLKernel : InheritableAttr { def SYCLKernelEntryPoint : InheritableAttr { let Spellings = [Clang<"sycl_kernel_entry_point">]; - let Args = [TypeArgument<"KernelName">]; + let Args = [ + // KernelName is required and specifies the kernel name type. + TypeArgument<"KernelName">, + // InvalidAttr is a fake argument used to track whether the + // semantic requirements of the attribute have been satisified. + // A fake argument is used to enable serialization support. + DefaultBoolArgument<"Invalid", /*default=*/0, /*fake=*/1> + ]; let Subjects = SubjectList<[Function], ErrorDiag>; let TemplateDependent = 1; let LangOpts = [SYCLHost, SYCLDevice]; let Documentation = [SYCLKernelEntryPointDocs]; + let AdditionalMembers = [{ + void setInvalidAttr() { invalid = true; } + bool isInvalidAttr() const { return invalid; } + }]; } def SYCLSpecialClass: InheritableAttr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index b8d702e41aa0b..506fe38eb882b 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -475,7 +475,7 @@ not first appear on a declaration that follows a definition of the function. The attribute only appertains to functions and only those that meet the following requirements. -* Has a ``void`` return type. +* Has a non-deduced ``void`` return type. * Is not a non-static member function, constructor, or destructor. * Is not a C variadic function. * Is not a coroutine. diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 3ac490d30371b..594e99a19b64d 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -648,6 +648,7 @@ def PoundPragmaMessage : DiagGroup<"#pragma-messages">, def : DiagGroup<"redundant-decls">; def RedeclaredClassMember : DiagGroup<"redeclared-class-member">; def GNURedeclaredEnum : DiagGroup<"gnu-redeclared-enum">; +def RedundantAttribute : DiagGroup<"redundant-attribute">; def RedundantMove : DiagGroup<"redundant-move">; def Register : DiagGroup<"register", [DeprecatedRegister]>; def ReturnTypeCLinkage : DiagGroup<"return-type-c-linkage">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ab2d6237c1cab..d4e897868f1a9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12429,6 +12429,33 @@ def err_sycl_special_type_num_init_method : Error< "types with 'sycl_special_class' attribute must have one and only one '__init' " "method defined">; +// SYCL kernel entry point diagnostics +def err_sycl_entry_point_invalid : Error< + "'sycl_kernel_entry_point' attribute cannot be applied to a" + " %select{non-static member function|variadic function|deleted function|" + "defaulted function|constexpr function|consteval function|" + "function declared with the 'noreturn' attribute|coroutine}0">; +def err_sycl_entry_point_invalid_redeclaration : Error< + "'sycl_kernel_entry_point' kernel name argument does not match prior" + " declaration%diff{: $ vs $|}0,1">; +def err_sycl_kernel_name_conflict : Error< + "'sycl_kernel_entry_point' kernel name argument conflicts with a previous" + " declaration">; +def warn_sycl_kernel_name_not_a_class_type : Warning< + "%0 is not a valid SYCL kernel name type; a non-union class type is required">, + InGroup>, DefaultError; +def warn_sycl_entry_point_redundant_declaration : Warning< + "redundant 'sycl_kernel_entry_point' attribute">, InGroup; +def err_sycl_entry_point_after_definition : Error< + "'sycl_kernel_entry_point' attribute cannot be added to a function after the" + " function is defined">; +def err_sycl_entry_point_return_type : Error< + "'sycl_kernel_entry_point' attribute only applies to functions with a" + " 'void' return type">; +def err_sycl_entry_point_deduced_return_type : Error< + "'sycl_kernel_entry_point' attribute only applies to functions with a" + " non-deduced 'void' return type">; + def warn_cuda_maxclusterrank_sm_90 : Warning< "maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring " "%1 attribute">, InGroup; diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h index c9f3358124eda..5bb0de40c886c 100644 --- a/clang/include/clang/Sema/SemaSYCL.h +++ b/clang/include/clang/Sema/SemaSYCL.h @@ -63,6 +63,8 @@ class SemaSYCL : public SemaBase { void handleKernelAttr(Decl *D, const ParsedAttr &AL); void handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL); + + void CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD); }; } // namespace clang diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index b10513f49a8d1..8f04b58841964 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -14502,6 +14502,19 @@ void ASTContext::registerSYCLEntryPointFunction(FunctionDecl *FD) { std::make_pair(KernelNameType, BuildSYCLKernelInfo(KernelNameType, FD))); } +const SYCLKernelInfo &ASTContext::getSYCLKernelInfo(QualType T) const { + CanQualType KernelNameType = getCanonicalType(T); + return SYCLKernels.at(KernelNameType); +} + +const SYCLKernelInfo *ASTContext::findSYCLKernelInfo(QualType T) const { + CanQualType KernelNameType = getCanonicalType(T); + auto IT = SYCLKernels.find(KernelNameType); + if (IT != SYCLKernels.end()) + return &IT->second; + return nullptr; +} + OMPTraitInfo &ASTContext::getNewOMPTraitInfo() { OMPTraitInfoVector.emplace_back(new OMPTraitInfo()); return *OMPTraitInfoVector.back(); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 4001c4d263f1d..75920052c4f0c 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -52,6 +52,7 @@ #include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/SemaPPC.h" #include "clang/Sema/SemaRISCV.h" +#include "clang/Sema/SemaSYCL.h" #include "clang/Sema/SemaSwift.h" #include "clang/Sema/SemaWasm.h" #include "clang/Sema/Template.h" @@ -2923,7 +2924,7 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) { AttrVec &NewAttributes = New->getAttrs(); for (unsigned I = 0, E = NewAttributes.size(); I != E;) { - const Attr *NewAttribute = NewAttributes[I]; + Attr *NewAttribute = NewAttributes[I]; if (isa(NewAttribute) || isa(NewAttribute)) { if (FunctionDecl *FD = dyn_cast(New)) { @@ -3018,6 +3019,16 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) { // declarations after definitions. ++I; continue; + } else if (isa(NewAttribute)) { + // Elevate latent uses of the sycl_kernel_entry_point attribute to an + // error since the definition will have already been created without + // the semantic effects of the attribute having been applied. + S.Diag(NewAttribute->getLocation(), + diag::err_sycl_entry_point_after_definition); + S.Diag(Def->getLocation(), diag::note_previous_definition); + cast(NewAttribute)->setInvalidAttr(); + ++I; + continue; } S.Diag(NewAttribute->getLocation(), @@ -12142,8 +12153,8 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, if (LangOpts.OpenMP) OpenMP().ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(NewFD); - if (LangOpts.isSYCL() && NewFD->hasAttr()) - getASTContext().registerSYCLEntryPointFunction(NewFD); + if (NewFD->hasAttr()) + SYCL().CheckSYCLEntryPointFunctionDecl(NewFD); // Semantic checking for this function declaration (in isolation). @@ -15975,6 +15986,25 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, CheckCoroutineWrapper(FD); } + // Diagnose invalid SYCL kernel entry point function declarations. + if (FD && !FD->isInvalidDecl() && FD->hasAttr()) { + SYCLKernelEntryPointAttr *SKEPAttr = + FD->getAttr(); + if (FD->isDefaulted()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*defaulted function*/ 3; + SKEPAttr->setInvalidAttr(); + } else if (FD->isDeleted()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*deleted function*/ 2; + SKEPAttr->setInvalidAttr(); + } else if (FSI->isCoroutine()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*coroutine*/ 7; + SKEPAttr->setInvalidAttr(); + } + } + { // Do not call PopExpressionEvaluationContext() if it is a lambda because // one is already popped when finishing the lambda in BuildLambdaExpr(). diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index a67c0b2b367d1..f2c3a816b3b5d 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -24,6 +24,7 @@ #include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaOpenMP.h" +#include "clang/Sema/SemaSYCL.h" #include "clang/Sema/Template.h" #include "llvm/ADT/STLExtras.h" #include @@ -1948,6 +1949,10 @@ ExprResult Sema::BuildCaptureInit(const Capture &Cap, ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body) { LambdaScopeInfo LSI = *cast(FunctionScopes.back()); + + if (LSI.CallOperator->hasAttr()) + SYCL().CheckSYCLEntryPointFunctionDecl(LSI.CallOperator); + ActOnFinishFunctionBody(LSI.CallOperator, Body); return BuildLambdaExpr(StartLoc, Body->getEndLoc(), &LSI); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index d4fddeb01d0fc..ce53990fdcb18 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -10,7 +10,9 @@ #include "clang/Sema/SemaSYCL.h" #include "clang/AST/Mangle.h" +#include "clang/AST/SYCLKernelInfo.h" #include "clang/AST/TypeOrdering.h" +#include "clang/Basic/Diagnostic.h" #include "clang/Sema/Attr.h" #include "clang/Sema/ParsedAttr.h" #include "clang/Sema/Sema.h" @@ -206,3 +208,157 @@ void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) { D->addAttr(::new (SemaRef.Context) SYCLKernelEntryPointAttr(SemaRef.Context, AL, TSI)); } + +// Given a potentially qualified type, SourceLocationForUserDeclaredType() +// returns the source location of the canonical declaration of the unqualified +// desugared user declared type, if any. For non-user declared types, an +// invalid source location is returned. The intended usage of this function +// is to identify an appropriate source location, if any, for a +// "entity declared here" diagnostic note. +static SourceLocation SourceLocationForUserDeclaredType(QualType QT) { + SourceLocation Loc; + const Type *T = QT->getUnqualifiedDesugaredType(); + if (const TagType *TT = dyn_cast(T)) + Loc = TT->getDecl()->getLocation(); + else if (const ObjCInterfaceType *ObjCIT = dyn_cast(T)) + Loc = ObjCIT->getDecl()->getLocation(); + return Loc; +} + +static bool CheckSYCLKernelName(Sema &S, SourceLocation Loc, + QualType KernelName) { + assert(!KernelName->isDependentType()); + + if (!KernelName->isStructureOrClassType()) { + // SYCL 2020 section 5.2, "Naming of kernels", only requires that the + // kernel name be a C++ typename. However, the definition of "kernel name" + // in the glossary states that a kernel name is a class type. Neither + // section explicitly states whether the kernel name type can be + // cv-qualified. For now, kernel name types are required to be class types + // and that they may be cv-qualified. The following issue requests + // clarification from the SYCL WG. + // https://github.com/KhronosGroup/SYCL-Docs/issues/568 + S.Diag(Loc, diag::warn_sycl_kernel_name_not_a_class_type) << KernelName; + SourceLocation DeclTypeLoc = SourceLocationForUserDeclaredType(KernelName); + if (DeclTypeLoc.isValid()) + S.Diag(DeclTypeLoc, diag::note_entity_declared_at) << KernelName; + return true; + } + + return false; +} + +void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) { + // Ensure that all attributes present on the declaration are consistent + // and warn about any redundant ones. + SYCLKernelEntryPointAttr *SKEPAttr = nullptr; + for (auto *SAI : FD->specific_attrs()) { + if (!SKEPAttr) { + SKEPAttr = SAI; + continue; + } + if (!getASTContext().hasSameType(SAI->getKernelName(), + SKEPAttr->getKernelName())) { + Diag(SAI->getLocation(), diag::err_sycl_entry_point_invalid_redeclaration) + << SAI->getKernelName() << SKEPAttr->getKernelName(); + Diag(SKEPAttr->getLocation(), diag::note_previous_attribute); + SAI->setInvalidAttr(); + } else { + Diag(SAI->getLocation(), + diag::warn_sycl_entry_point_redundant_declaration); + Diag(SKEPAttr->getLocation(), diag::note_previous_attribute); + } + } + assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute"); + + // Ensure the kernel name type is valid. + if (!SKEPAttr->getKernelName()->isDependentType() && + CheckSYCLKernelName(SemaRef, SKEPAttr->getLocation(), + SKEPAttr->getKernelName())) + SKEPAttr->setInvalidAttr(); + + // Ensure that an attribute present on the previous declaration + // matches the one on this declaration. + FunctionDecl *PrevFD = FD->getPreviousDecl(); + if (PrevFD && !PrevFD->isInvalidDecl()) { + const auto *PrevSKEPAttr = PrevFD->getAttr(); + if (PrevSKEPAttr && !PrevSKEPAttr->isInvalidAttr()) { + if (!getASTContext().hasSameType(SKEPAttr->getKernelName(), + PrevSKEPAttr->getKernelName())) { + Diag(SKEPAttr->getLocation(), + diag::err_sycl_entry_point_invalid_redeclaration) + << SKEPAttr->getKernelName() << PrevSKEPAttr->getKernelName(); + Diag(PrevSKEPAttr->getLocation(), diag::note_previous_decl) << PrevFD; + SKEPAttr->setInvalidAttr(); + } + } + } + + if (const auto *MD = dyn_cast(FD)) { + if (!MD->isStatic()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*non-static member function*/ 0; + SKEPAttr->setInvalidAttr(); + } + } + + if (FD->isVariadic()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*variadic function*/ 1; + SKEPAttr->setInvalidAttr(); + } + + if (FD->isDefaulted()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*defaulted function*/ 3; + SKEPAttr->setInvalidAttr(); + } else if (FD->isDeleted()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*deleted function*/ 2; + SKEPAttr->setInvalidAttr(); + } + + if (FD->isConsteval()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*consteval function*/ 5; + SKEPAttr->setInvalidAttr(); + } else if (FD->isConstexpr()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*constexpr function*/ 4; + SKEPAttr->setInvalidAttr(); + } + + if (FD->isNoReturn()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid) + << /*function declared with the 'noreturn' attribute*/ 6; + SKEPAttr->setInvalidAttr(); + } + + if (FD->getReturnType()->isUndeducedType()) { + Diag(SKEPAttr->getLocation(), + diag::err_sycl_entry_point_deduced_return_type); + SKEPAttr->setInvalidAttr(); + } else if (!FD->getReturnType()->isDependentType() && + !FD->getReturnType()->isVoidType()) { + Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_return_type); + SKEPAttr->setInvalidAttr(); + } + + if (!FD->isInvalidDecl() && !FD->isTemplated() && + !SKEPAttr->isInvalidAttr()) { + const SYCLKernelInfo *SKI = + getASTContext().findSYCLKernelInfo(SKEPAttr->getKernelName()); + if (SKI) { + if (!declaresSameEntity(FD, SKI->getKernelEntryPointDecl())) { + // FIXME: This diagnostic should include the origin of the kernel + // FIXME: names; not just the locations of the conflicting declarations. + Diag(FD->getLocation(), diag::err_sycl_kernel_name_conflict); + Diag(SKI->getKernelEntryPointDecl()->getLocation(), + diag::note_previous_declaration); + SKEPAttr->setInvalidAttr(); + } + } else { + getASTContext().registerSYCLEntryPointFunction(FD); + } + } +} diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 8c60e85c93d70..dee5169ae5723 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1134,8 +1134,19 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { // the presence of a sycl_kernel_entry_point attribute, register it so that // associated metadata is recreated. if (FD->hasAttr()) { + auto *SKEPAttr = FD->getAttr(); ASTContext &C = Reader.getContext(); - C.registerSYCLEntryPointFunction(FD); + const SYCLKernelInfo *SKI = C.findSYCLKernelInfo(SKEPAttr->getKernelName()); + if (SKI) { + if (!declaresSameEntity(FD, SKI->getKernelEntryPointDecl())) { + Reader.Diag(FD->getLocation(), diag::err_sycl_kernel_name_conflict); + Reader.Diag(SKI->getKernelEntryPointDecl()->getLocation(), + diag::note_previous_declaration); + SKEPAttr->setInvalidAttr(); + } + } else { + C.registerSYCLEntryPointFunction(FD); + } } } diff --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp index c351f3b7d03ea..0189cf0402d3a 100644 --- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp +++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp @@ -107,14 +107,29 @@ void skep5>(long) { // CHECK: | |-TemplateArgument type 'long' // CHECK: | `-SYCLKernelEntryPointAttr {{.*}} KN<5, 2> +// FIXME: C++23 [temp.expl.spec]p12 states: +// FIXME: ... Similarly, attributes appearing in the declaration of a template +// FIXME: have no effect on an explicit specialization of that template. +// FIXME: Clang currently instantiates a function template specialization from +// FIXME: the function template declaration and links it as a previous +// FIXME: declaration of an explicit specialization. The instantiated +// FIXME: declaration includes attributes instantiated from the function +// FIXME: template declaration. When the instantiated declaration and the +// FIXME: explicit specialization both specify a sycl_kernel_entry_point +// FIXME: attribute with different kernel name types, a spurious diagnostic +// FIXME: is issued. The following test case is incorrectly diagnosed as +// FIXME: having conflicting kernel name types (KN<5,3> vs the incorrectly +// FIXME: inherited KN<5,-1>). +#if 0 template<> [[clang::sycl_kernel_entry_point(KN<5,3>)]] void skep5>(long long) { } -// CHECK: |-FunctionDecl {{.*}} prev {{.*}} skep5 'void (long long)' explicit_specialization -// CHECK-NEXT: | |-TemplateArgument type 'KN<5, -1>' -// CHECK: | |-TemplateArgument type 'long long' -// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} KN<5, 3> +// FIXME-CHECK: |-FunctionDecl {{.*}} prev {{.*}} skep5 'void (long long)' explicit_specialization +// FIXME-CHECK-NEXT: | |-TemplateArgument type 'KN<5, -1>' +// FIXME-CHECK: | |-TemplateArgument type 'long long' +// FIXME-CHECK: | `-SYCLKernelEntryPointAttr {{.*}} KN<5, 3> +#endif template void skep5>(int); // Checks are located with the primary template declaration above. diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp new file mode 100644 index 0000000000000..5b3cf9853173d --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp @@ -0,0 +1,352 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-device -verify %s + +// These tests validate appertainment for the sycl_kernel_entry_point attribute. + +#if __cplusplus >= 202002L +// Mock coroutine support. +namespace std { + +template +struct coroutine_handle { + template + coroutine_handle(const coroutine_handle&); + static coroutine_handle from_address(void *addr); +}; + +template +struct coroutine_traits { + struct suspend_never { + bool await_ready() const noexcept; + void await_suspend(std::coroutine_handle<>) const noexcept; + void await_resume() const noexcept; + }; + struct promise_type { + void get_return_object() noexcept; + suspend_never initial_suspend() const noexcept; + suspend_never final_suspend() const noexcept; + void return_void() noexcept; + void unhandled_exception() noexcept; + }; +}; + +} +#endif + +// A unique kernel name type is required for each declared kernel entry point. +template struct KN; + + +//////////////////////////////////////////////////////////////////////////////// +// Valid declarations. +//////////////////////////////////////////////////////////////////////////////// + +// Function declaration with GNU attribute spelling +__attribute__((sycl_kernel_entry_point(KN<1>))) +void ok1(); + +// Function declaration with Clang attribute spelling. +[[clang::sycl_kernel_entry_point(KN<2>)]] +void ok2(); + +// Function definition. +[[clang::sycl_kernel_entry_point(KN<3>)]] +void ok3() {} + +// Function template definition. +template +[[clang::sycl_kernel_entry_point(KNT)]] +void ok4(T) {} + +// Function template explicit specialization. +template<> +[[clang::sycl_kernel_entry_point(KN<4,1>)]] +void ok4>(int) {} + +// Function template explicit instantiation. +template void ok4, long>(long); + +namespace NS { +// Function declaration at namespace scope. +[[clang::sycl_kernel_entry_point(KN<5>)]] +void ok5(); +} + +struct S6 { + // Static member function declaration. + [[clang::sycl_kernel_entry_point(KN<6>)]] + static void ok6(); +}; + +// Dependent hidden friend definition. +template +struct S7 { + [[clang::sycl_kernel_entry_point(KNT)]] + friend void ok7(S7) {} +}; +void test_ok7() { + ok7(S7>{}); +} + +// Non-dependent hidden friend definition. +struct S8Base {}; +template +struct S8 : S8Base { + [[clang::sycl_kernel_entry_point(KN<8>)]] + friend void ok8(const S8Base&) {} +}; +void test_ok8() { + ok8(S8{}); +} + +// The sycl_kernel_entry_point attribute must match across declarations and +// cannot be added for the first time after a definition. +[[clang::sycl_kernel_entry_point(KN<9>)]] +void ok9(); +[[clang::sycl_kernel_entry_point(KN<9>)]] +void ok9(); +[[clang::sycl_kernel_entry_point(KN<10>)]] +void ok10(); +void ok10() {} +void ok11(); +[[clang::sycl_kernel_entry_point(KN<11>)]] +void ok11() {} + +using VOID = void; +[[clang::sycl_kernel_entry_point(KN<12>)]] +VOID ok12(); +[[clang::sycl_kernel_entry_point(KN<13>)]] +const void ok13(); + +#if __cplusplus >= 202302L +auto ok14 = [] [[clang::sycl_kernel_entry_point(KN<14>)]] static -> void {}; +#endif + +template +struct S15 { + // Don't diagnose a dependent return type as a non-void type. + [[clang::sycl_kernel_entry_point(KNT)]] + static T ok15(); +}; + + +//////////////////////////////////////////////////////////////////////////////// +// Invalid declarations. +//////////////////////////////////////////////////////////////////////////////// + +// The sycl_kernel_entry_point attribute cannot appertain to main() because +// main() has a non-void return type. However, if the requirement for a void +// return type were to be relaxed or if an allowance was made for main() to +// return void (as gcc allows in some modes and as has been proposed to WG21 +// on occassion), main() still can't function as a SYCL kernel entry point, +// so this test ensures such attempted uses of the attribute are rejected. +struct Smain; +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions with a 'void' return type}} +[[clang::sycl_kernel_entry_point(Smain)]] +int main(); + +template struct BADKN; + +struct B1 { + // Non-static data member declaration. + // expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} + [[clang::sycl_kernel_entry_point(BADKN<1>)]] + int bad1; +}; + +struct B2 { + // Static data member declaration. + // expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} + [[clang::sycl_kernel_entry_point(BADKN<2>)]] + static int bad2; +}; + +struct B3 { + // Non-static member function declaration. + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} + [[clang::sycl_kernel_entry_point(BADKN<3>)]] + void bad3(); +}; + +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} +namespace bad4 [[clang::sycl_kernel_entry_point(BADKN<4>)]] {} + +#if __cplusplus >= 202002L +// expected-error@+2 {{'sycl_kernel_entry_point' attribute only applies to functions}} +template +concept bad5 [[clang::sycl_kernel_entry_point(BADKN<5>)]] = true; +#endif + +// Type alias declarations. +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} +typedef void bad6 [[clang::sycl_kernel_entry_point(BADKN<6>)]] (); +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} +using bad7 [[clang::sycl_kernel_entry_point(BADKN<7>)]] = void(); +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} +using bad8 [[clang::sycl_kernel_entry_point(BADKN<8>)]] = int; +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to types}} +using bad9 = int [[clang::sycl_kernel_entry_point(BADKN<9>)]]; +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to types}} +using bad10 = int() [[clang::sycl_kernel_entry_point(BADKN<10>)]]; + +// Variable declaration. +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} +[[clang::sycl_kernel_entry_point(BADKN<11>)]] +int bad11; + +// Class declaration. +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} +struct [[clang::sycl_kernel_entry_point(BADKN<12>)]] bad12; + +// Enumeration declaration. +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} +enum [[clang::sycl_kernel_entry_point(BADKN<13>)]] bad13 {}; + +// Enumerator. +// expected-error@+2 {{'sycl_kernel_entry_point' attribute only applies to functions}} +enum { + bad14 [[clang::sycl_kernel_entry_point(BADKN<14>)]] +}; + +// Attribute added after the definition. +// expected-error@+3 {{'sycl_kernel_entry_point' attribute cannot be added to a function after the function is defined}} +// expected-note@+1 {{previous definition is here}} +void bad15() {} +[[clang::sycl_kernel_entry_point(BADKN<15>)]] +void bad15(); + +// The function must return void. +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions with a 'void' return type}} +[[clang::sycl_kernel_entry_point(BADKN<16>)]] +int bad16(); + +// Function parameters. +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} +void bad17(void (fp [[clang::sycl_kernel_entry_point(BADKN<17>)]])()); + +// Function template parameters. +// FIXME: Clang currently ignores attributes that appear in template parameters +// FIXME: and the C++ standard is unclear regarding whether such attributes are +// FIXME: permitted. P3324 (Attributes for namespace aliases, template +// FIXME: parameters, and lambda captures) seeks to clarify the situation. +// FIXME-expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} +template)]])()> +void bad18(); + +#if __cplusplus >= 202002L +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a coroutine}} +[[clang::sycl_kernel_entry_point(BADKN<19>)]] +void bad19() { + co_return; +} +#endif + +struct B20 { + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} + [[clang::sycl_kernel_entry_point(BADKN<20>)]] + B20(); +}; + +struct B21 { + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} + [[clang::sycl_kernel_entry_point(BADKN<21>)]] + ~B21(); +}; + +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a variadic function}} +[[clang::sycl_kernel_entry_point(BADKN<22>)]] +void bad22(...); + +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a deleted function}} +[[clang::sycl_kernel_entry_point(BADKN<23>)]] +void bad23() = delete; + +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a constexpr function}} +[[clang::sycl_kernel_entry_point(BADKN<24>)]] +constexpr void bad24() {} + +#if __cplusplus >= 202002L +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a consteval function}} +[[clang::sycl_kernel_entry_point(BADKN<25>)]] +consteval void bad25() {} +#endif + +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a function declared with the 'noreturn' attribute}} +[[clang::sycl_kernel_entry_point(BADKN<26>)]] +[[noreturn]] void bad26(); + +// expected-error@+3 {{attribute 'target' multiversioning cannot be combined with attribute 'sycl_kernel_entry_point'}} +__attribute__((target("avx"))) void bad27(); +[[clang::sycl_kernel_entry_point(BADKN<27>)]] +__attribute__((target("sse4.2"))) void bad27(); + +template +struct B28 { + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a deleted function}} + [[clang::sycl_kernel_entry_point(KNT)]] + friend void bad28() = delete; +}; + +#if __cplusplus >= 202002L +template +struct B29 { + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a defaulted function}} + [[clang::sycl_kernel_entry_point(KNT)]] + friend T operator==(B29, B29) = default; +}; +#endif + +#if __cplusplus >= 202002L +template +struct B30 { + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a coroutine}} + [[clang::sycl_kernel_entry_point(KNT)]] + friend void bad30() { co_return; } +}; +#endif + +template +struct B31 { + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a variadic function}} + [[clang::sycl_kernel_entry_point(KNT)]] + friend void bad31(...) {} +}; + +template +struct B32 { + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a constexpr function}} + [[clang::sycl_kernel_entry_point(KNT)]] + friend constexpr void bad32() {} +}; + +#if __cplusplus >= 202002L +template +struct B33 { + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a consteval function}} + [[clang::sycl_kernel_entry_point(KNT)]] + friend consteval void bad33() {} +}; +#endif + +template +struct B34 { + // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a function declared with the 'noreturn' attribute}} + [[clang::sycl_kernel_entry_point(KNT)]] + [[noreturn]] friend void bad34() {} +}; + +#if __cplusplus >= 202302L +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}} +auto bad35 = [] [[clang::sycl_kernel_entry_point(BADKN<35>)]] -> void {}; +#endif + +#if __cplusplus >= 202302L +// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions with a non-deduced 'void' return type}} +auto bad36 = [] [[clang::sycl_kernel_entry_point(BADKN<36>)]] static {}; +#endif + +#if __cplusplus >= 202302L +// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a coroutine}} +auto bad37 = [] [[clang::sycl_kernel_entry_point(BADKN<37>)]] static -> void { co_return; }; +#endif diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp index c63d241163e61..14b5d2746631d 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp @@ -120,18 +120,3 @@ template concept C = true; // expected-error@+1 {{expected a type}} [[clang::sycl_kernel_entry_point(C)]] void bad11(); #endif - -struct B12; // #B12-decl -// FIXME: C++23 [temp.expl.spec]p12 states: -// FIXME: ... Similarly, attributes appearing in the declaration of a template -// FIXME: have no effect on an explicit specialization of that template. -// FIXME: Clang currently instantiates and propagates attributes from a function -// FIXME: template to its explicit specializations resulting in the following -// FIXME: spurious error. -// expected-error@+4 {{incomplete type 'B12' named in nested name specifier}} -// expected-note@+5 {{in instantiation of function template specialization 'bad12' requested here}} -// expected-note@#B12-decl {{forward declaration of 'B12'}} -template -[[clang::sycl_kernel_entry_point(typename T::not_found)]] void bad12() {} -template<> -void bad12() {} diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp new file mode 100644 index 0000000000000..83c3e5ca267ab --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp @@ -0,0 +1,104 @@ +// Test that SYCL kernel name conflicts that occur across module boundaries are +// properly diagnosed and that declarations are properly merged so that spurious +// conflicts are not reported. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \ +// RUN: -std=c++17 -fsycl-is-host %t/test.cpp -verify +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \ +// RUN: -std=c++17 -fsycl-is-device %t/test.cpp -verify + +#--- module.modulemap +module M1 { header "m1.h" } +module M2 { header "m2.h" } + + +#--- common.h +template struct KN; + +[[clang::sycl_kernel_entry_point(KN<1>)]] +void common_test1() {} + +template +[[clang::sycl_kernel_entry_point(T)]] +void common_test2() {} +template void common_test2>(); + + +#--- m1.h +#include "common.h" + +[[clang::sycl_kernel_entry_point(KN<3>)]] +void m1_test3() {} // << expected previous declaration note here. + +template +[[clang::sycl_kernel_entry_point(T)]] +void m1_test4() {} // << expected previous declaration note here. +template void m1_test4>(); + +[[clang::sycl_kernel_entry_point(KN<5>)]] +void m1_test5() {} // << expected previous declaration note here. + +template +[[clang::sycl_kernel_entry_point(T)]] +void m1_test6() {} // << expected previous declaration note here. +template void m1_test6>(); + + +#--- m2.h +#include "common.h" + +[[clang::sycl_kernel_entry_point(KN<3>)]] +void m2_test3() {} // << expected kernel name conflict here. + +template +[[clang::sycl_kernel_entry_point(T)]] +void m2_test4() {} // << expected kernel name conflict here. +template void m2_test4>(); + +[[clang::sycl_kernel_entry_point(KN<7>)]] +void m2_test7() {} // << expected previous declaration note here. + +template +[[clang::sycl_kernel_entry_point(T)]] +void m2_test8() {} // << expected previous declaration note here. +template void m2_test8>(); + + +#--- test.cpp +#include "m1.h" +#include "m2.h" + +// Expected diagnostics for m1_test3() and m2_test3(): +// expected-error@m2.h:4 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} +// expected-note@m1.h:12 {{previous declaration is here}} + +// Expected diagnostics for m1_test4>() and m2_test4>(): +// expected-error@m2.h:8 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} +// expected-note@m1.h:16 {{previous declaration is here}} + +// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} +// expected-note@m1.h:4 {{previous declaration is here}} +[[clang::sycl_kernel_entry_point(KN<5>)]] +void test5() {} + +// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} +// expected-note@m1.h:8 {{previous declaration is here}} +[[clang::sycl_kernel_entry_point(KN<6>)]] +void test6() {} + +// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} +// expected-note@m2.h:12 {{previous declaration is here}} +[[clang::sycl_kernel_entry_point(KN<7>)]] +void test7() {} + +// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} +// expected-note@m2.h:16 {{previous declaration is here}} +[[clang::sycl_kernel_entry_point(KN<8>)]] +void test8() {} + +void f() { + common_test1(); + common_test2>(); +} diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp new file mode 100644 index 0000000000000..0814d898d1c0e --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp @@ -0,0 +1,36 @@ +// Test that SYCL kernel name conflicts that occur across PCH boundaries are +// properly diagnosed. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -std=c++17 -fsycl-is-host -emit-pch -x c++-header \ +// RUN: %t/pch.h -o %t/pch.h.host.pch +// RUN: %clang_cc1 -std=c++17 -fsycl-is-host -verify \ +// RUN: -include-pch %t/pch.h.host.pch %t/test.cpp +// RUN: %clang_cc1 -std=c++17 -fsycl-is-device -emit-pch -x c++-header \ +// RUN: %t/pch.h -o %t/pch.h.device.pch +// RUN: %clang_cc1 -std=c++17 -fsycl-is-device -verify \ +// RUN: -include-pch %t/pch.h.device.pch %t/test.cpp + +#--- pch.h +template struct KN; + +[[clang::sycl_kernel_entry_point(KN<1>)]] +void pch_test1() {} // << expected previous declaration note here. + +template +[[clang::sycl_kernel_entry_point(T)]] +void pch_test2() {} // << expected previous declaration note here. +template void pch_test2>(); + + +#--- test.cpp +// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} +// expected-note@pch.h:4 {{previous declaration is here}} +[[clang::sycl_kernel_entry_point(KN<1>)]] +void test1() {} + +// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} +// expected-note@pch.h:8 {{previous declaration is here}} +[[clang::sycl_kernel_entry_point(KN<2>)]] +void test2() {} diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp new file mode 100644 index 0000000000000..78dd89696c02d --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp @@ -0,0 +1,118 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s + +// These tests validate that the kernel name type argument provided to the +// sycl_kernel_entry_point attribute meets the requirements of a SYCL kernel +// name as described in section 5.2, "Naming of kernels", of the SYCL 2020 +// specification. + +struct S1; +// expected-warning@+3 {{redundant 'sycl_kernel_entry_point' attribute}} +// expected-note@+1 {{previous attribute is here}} +[[clang::sycl_kernel_entry_point(S1), + clang::sycl_kernel_entry_point(S1)]] +void ok1(); + +// expected-error@+1 {{'int' is not a valid SYCL kernel name type; a non-union class type is required}} +[[clang::sycl_kernel_entry_point(int)]] void bad2(); + +// expected-error@+1 {{'int ()' is not a valid SYCL kernel name type; a non-union class type is required}} +[[clang::sycl_kernel_entry_point(int())]] void bad3(); + +// expected-error@+1 {{'int (*)()' is not a valid SYCL kernel name type; a non-union class type is required}} +[[clang::sycl_kernel_entry_point(int(*)())]] void bad4(); + +// expected-error@+1 {{'int (&)()' is not a valid SYCL kernel name type; a non-union class type is required}} +[[clang::sycl_kernel_entry_point(int(&)())]] void bad5(); + +// expected-error@+1 {{'decltype(nullptr)' (aka 'std::nullptr_t') is not a valid SYCL kernel name type; a non-union class type is required}} +[[clang::sycl_kernel_entry_point(decltype(nullptr))]] void bad6(); + +union U7; // #U7-decl +// expected-error@+2 {{'U7' is not a valid SYCL kernel name type; a non-union class type is required}} +// expected-note@#U7-decl {{'U7' declared here}} +[[clang::sycl_kernel_entry_point(U7)]] void bad7(); + +enum E8 {}; // #E8-decl +// expected-error@+2 {{'E8' is not a valid SYCL kernel name type; a non-union class type is required}} +// expected-note@#E8-decl {{'E8' declared here}} +[[clang::sycl_kernel_entry_point(E8)]] void bad8(); + +enum E9 : int; // #E9-decl +// expected-error@+2 {{'E9' is not a valid SYCL kernel name type; a non-union class type is required}} +// expected-note@#E9-decl {{'E9' declared here}} +[[clang::sycl_kernel_entry_point(E9)]] void bad9(); + +struct B10 { + struct MS; +}; +// FIXME-expected-error@+1 {{'sycl_kernel_entry_point' attribute argument must be a forward declarable class type}} +[[clang::sycl_kernel_entry_point(B10::MS)]] void bad10(); + +struct B11 { + struct MS; +}; +// FIXME-expected-error@+3 {{'sycl_kernel_entry_point' attribute argument must be a forward declarable class type}} +template +[[clang::sycl_kernel_entry_point(typename T::MS)]] void bad11() {} +template void bad11(); + +template +[[clang::sycl_kernel_entry_point(T)]] void bad12(); +void f12() { + // FIXME-expected-error@+2 {{'sycl_kernel_entry_point' attribute argument must be a forward declarable class type}} + struct LS; + bad12(); +} + +struct B13_1; +struct B13_2; +// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B13_2' vs 'B13_1'}} +// expected-note@+1 {{'bad13' declared here}} +[[clang::sycl_kernel_entry_point(B13_1)]] void bad13(); +[[clang::sycl_kernel_entry_point(B13_2)]] void bad13() {} + +struct B14_1; +struct B14_2; +// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B14_2' vs 'B14_1'}} +// expected-note@+1 {{previous attribute is here}} +[[clang::sycl_kernel_entry_point(B14_1), + clang::sycl_kernel_entry_point(B14_2)]] +void bad14(); + +struct B15; +// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}} +// expected-note@+1 {{previous declaration is here}} +[[clang::sycl_kernel_entry_point(B15)]] void bad15_1(); +[[clang::sycl_kernel_entry_point(B15)]] void bad15_2(); + +struct B16_1; +struct B16_2; +// expected-error@+4 {{'sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B16_2' vs 'B16_1'}} +// expected-note@+1 {{'bad16' declared here}} +[[clang::sycl_kernel_entry_point(B16_1)]] void bad16(); +void bad16(); // The attribute from the previous declaration is inherited. +[[clang::sycl_kernel_entry_point(B16_2)]] void bad16(); + +template +struct B17 { + // expected-error@+1 {{'int' is not a valid SYCL kernel name type; a non-union class type is required}} + [[clang::sycl_kernel_entry_point(int)]] + static void bad17(); +}; + +template +struct B18 { + // expected-error@+1 {{'int' is not a valid SYCL kernel name type; a non-union class type is required}} + [[clang::sycl_kernel_entry_point(int)]] + friend void bad18() {} +}; + +template +struct B19 { + // expected-error@+1 {{'int' is not a valid SYCL kernel name type; a non-union class type is required}} + [[clang::sycl_kernel_entry_point(KNT)]] + friend void bad19() {} +}; +// expected-note@+1 {{in instantiation of template class 'B19' requested here}} +B19 b19; diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp new file mode 100644 index 0000000000000..4c61570419629 --- /dev/null +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp @@ -0,0 +1,65 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s + +// These tests are intended to validate that a sycl_kernel_entry_point attribute +// appearing in the declaration of a function template does not affect overload +// resolution or cause spurious errors during overload resolution due to either +// a substitution failure in the attribute argument or a semantic check of the +// attribute during instantiation of a specialization unless that specialization +// is selected by overload resolution. + +// FIXME: C++23 [temp.expl.spec]p12 states: +// FIXME: ... Similarly, attributes appearing in the declaration of a template +// FIXME: have no effect on an explicit specialization of that template. +// FIXME: Clang currently instantiates and propagates attributes from a function +// FIXME: template to its explicit specializations resulting in the following +// FIXME: spurious error. +struct S1; // #S1-decl +// expected-error@+4 {{incomplete type 'S1' named in nested name specifier}} +// expected-note@+5 {{in instantiation of function template specialization 'ok1' requested here}} +// expected-note@#S1-decl {{forward declaration of 'S1'}} +template +[[clang::sycl_kernel_entry_point(typename T::invalid)]] void ok1() {} +template<> +void ok1() {} +void test_ok1() { + // ok1() is not a call to a SYCL kernel entry point function. + ok1(); +} + +// FIXME: The sycl_kernel_entry_point attribute should not be instantiated +// FIXME: until after overload resolution has completed. +struct S2; // #S2-decl +// expected-error@+6 {{incomplete type 'S2' named in nested name specifier}} +// expected-note@+10 {{in instantiation of function template specialization 'ok2' requested here}} +// expected-note@#S2-decl {{forward declaration of 'S2'}} +template +[[clang::sycl_kernel_entry_point(T)]] void ok2(int) {} +template +[[clang::sycl_kernel_entry_point(typename T::invalid)]] void ok2(long) {} +void test_ok2() { + // ok2(int) is a better match and is therefore selected by overload + // resolution; the attempted instantiation of ok2(long) should not produce + // an error for the substitution failure into the attribute argument. + ok2(2); +} + +// FIXME: The sycl_kernel_entry_point attribute should not be instantiated +// FIXME: until after overload resolution has completed. +struct S3; +struct Select3 { + using bad_type = int; + using good_type = S3; +}; +// expected-error@+5 {{'typename Select3::bad_type' (aka 'int') is not a valid SYCL kernel name type; a non-union class type is required}} +// expected-note@+9 {{in instantiation of function template specialization 'ok3' requested here}} +template +[[clang::sycl_kernel_entry_point(typename T::good_type)]] void ok3(int) {} +template +[[clang::sycl_kernel_entry_point(typename T::bad_type)]] void ok3(long) {} +void test_ok3() { + // ok3(int) is a better match and is therefore selected by overload + // resolution; the attempted instantiation of ok3(long) should not produce + // an error for the invalid kernel name provided as the attribute argument. + ok3(2); +} From 0acdba83ae5098c5c8f09f53aba4df37d97f3cf0 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 9 Jan 2025 14:46:34 -0600 Subject: [PATCH 57/79] [libc] Remove leftover 'gpu/' source directory (#122368) Summary: This isn't used anymore, I moved the GPU extensions into `offload/`. --- libc/src/CMakeLists.txt | 4 ---- libc/src/gpu/CMakeLists.txt | 10 ---------- libc/src/gpu/rpc_host_call.cpp | 35 ---------------------------------- libc/src/gpu/rpc_host_call.h | 21 -------------------- 4 files changed, 70 deletions(-) delete mode 100644 libc/src/gpu/CMakeLists.txt delete mode 100644 libc/src/gpu/rpc_host_call.cpp delete mode 100644 libc/src/gpu/rpc_host_call.h diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt index 9fc331ad18a39..32308ba147940 100644 --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -26,10 +26,6 @@ if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(unistd) endif() -if(${LIBC_TARGET_OS} STREQUAL "gpu") - add_subdirectory(gpu) -endif() - if(NOT LLVM_LIBC_FULL_BUILD) return() endif() diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt deleted file mode 100644 index e20228516b511..0000000000000 --- a/libc/src/gpu/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_entrypoint_object( - rpc_host_call - SRCS - rpc_host_call.cpp - HDRS - rpc_host_call.h - DEPENDS - libc.src.__support.RPC.rpc_client - libc.src.__support.GPU.utils -) diff --git a/libc/src/gpu/rpc_host_call.cpp b/libc/src/gpu/rpc_host_call.cpp deleted file mode 100644 index 676031d16e154..0000000000000 --- a/libc/src/gpu/rpc_host_call.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===---------- GPU implementation of the external RPC call function ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/gpu/rpc_host_call.h" - -#include "src/__support/GPU/utils.h" -#include "src/__support/RPC/rpc_client.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -// This calls the associated function pointer on the RPC server with the given -// arguments. We expect that the pointer here is a valid pointer on the server. -LLVM_LIBC_FUNCTION(unsigned long long, rpc_host_call, - (void *fn, void *data, size_t size)) { - rpc::Client::Port port = rpc::client.open(); - port.send_n(data, size); - port.send([=](rpc::Buffer *buffer, uint32_t) { - buffer->data[0] = reinterpret_cast(fn); - }); - unsigned long long ret; - port.recv([&](rpc::Buffer *buffer, uint32_t) { - ret = static_cast(buffer->data[0]); - }); - port.close(); - return ret; -} - -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/gpu/rpc_host_call.h b/libc/src/gpu/rpc_host_call.h deleted file mode 100644 index 861149dead561..0000000000000 --- a/libc/src/gpu/rpc_host_call.h +++ /dev/null @@ -1,21 +0,0 @@ -//===-- Implementation header for RPC functions -----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H -#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H - -#include "src/__support/macros/config.h" -#include // size_t - -namespace LIBC_NAMESPACE_DECL { - -unsigned long long rpc_host_call(void *fn, void *buffer, size_t size); - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H From 84087226fa38b212325c651f1bc3caa79491bc80 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 9 Jan 2025 21:48:16 +0100 Subject: [PATCH 58/79] [LLD][COFF] Emit base relocation for native CHPE metadata pointer on ARM64X (#121500) --- lld/COFF/Chunks.cpp | 16 ++++++++++++++++ lld/test/COFF/arm64x-loadconfig.s | 6 +++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index ec0fdf0b67b38..d87a4d8d1ae79 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -564,6 +564,22 @@ void SectionChunk::getBaserels(std::vector *res) { continue; res->emplace_back(rva + rel.VirtualAddress, ty); } + + // Insert a 64-bit relocation for CHPEMetadataPointer in the native load + // config of a hybrid ARM64X image. Its value will be set in prepareLoadConfig + // to match the value in the EC load config, which is expected to be + // a relocatable pointer to the __chpe_metadata symbol. + COFFLinkerContext &ctx = file->symtab.ctx; + if (ctx.hybridSymtab && ctx.symtab.loadConfigSym && + ctx.symtab.loadConfigSym->getChunk() == this && + ctx.hybridSymtab->loadConfigSym && + ctx.symtab.loadConfigSize >= + offsetof(coff_load_configuration64, CHPEMetadataPointer) + + sizeof(coff_load_configuration64::CHPEMetadataPointer)) + res->emplace_back( + ctx.symtab.loadConfigSym->getRVA() + + offsetof(coff_load_configuration64, CHPEMetadataPointer), + IMAGE_REL_BASED_DIR64); } // MinGW specific. diff --git a/lld/test/COFF/arm64x-loadconfig.s b/lld/test/COFF/arm64x-loadconfig.s index d21f4bfe95b84..3b53d32a9b549 100644 --- a/lld/test/COFF/arm64x-loadconfig.s +++ b/lld/test/COFF/arm64x-loadconfig.s @@ -118,10 +118,14 @@ // BASERELOC: BaseReloc [ // BASERELOC-NEXT: Entry { // BASERELOC-NEXT: Type: DIR64 +// BASERELOC-NEXT: Address: 0x10C8 +// BASERELOC-NEXT: } +// BASERELOC-NEXT: Entry { +// BASERELOC-NEXT: Type: DIR64 // BASERELOC-NEXT: Address: 0x1208 // BASERELOC-NEXT: } // BASERELOC-NEXT: Entry { -// BASERELOC: Type: DIR64 +// BASERELOC-NEXT: Type: DIR64 // BASERELOC-NEXT: Address: 0x2074 // BASERELOC-NEXT: } From e8c8543a1c728278bf323d34e451bcf9042d9257 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 9 Jan 2025 20:55:36 +0000 Subject: [PATCH 59/79] [TySan] Intercept malloc_size on Apple platforms. (#122133) After https://github.com/llvm/llvm-project/pull/120563 malloc_size also needs intercepting on Apple platforms, otherwise all type-sanitized binaries crash on startup with an objc error: realized class 0x12345 has corrupt data pointer: malloc_size(0x567) = 0 PR: https://github.com/llvm/llvm-project/pull/122133 --- .../sanitizer_common/sanitizer_allocator_dlsym.h | 14 ++++++++------ compiler-rt/lib/tysan/tysan_interceptors.cpp | 8 ++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h index b360478a058a5..6e6cdbd9eeaed 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h @@ -36,21 +36,19 @@ struct DlSymAllocator { static void *Allocate(uptr size_in_bytes, uptr align = kWordSize) { void *ptr = InternalAlloc(size_in_bytes, nullptr, align); CHECK(internal_allocator()->FromPrimary(ptr)); - Details::OnAllocate(ptr, - internal_allocator()->GetActuallyAllocatedSize(ptr)); + Details::OnAllocate(ptr, GetSize(ptr)); return ptr; } static void *Callocate(usize nmemb, usize size) { void *ptr = InternalCalloc(nmemb, size); CHECK(internal_allocator()->FromPrimary(ptr)); - Details::OnAllocate(ptr, - internal_allocator()->GetActuallyAllocatedSize(ptr)); + Details::OnAllocate(ptr, GetSize(ptr)); return ptr; } static void Free(void *ptr) { - uptr size = internal_allocator()->GetActuallyAllocatedSize(ptr); + uptr size = GetSize(ptr); Details::OnFree(ptr, size); InternalFree(ptr); } @@ -63,7 +61,7 @@ struct DlSymAllocator { Free(ptr); return nullptr; } - uptr size = internal_allocator()->GetActuallyAllocatedSize(ptr); + uptr size = GetSize(ptr); uptr memcpy_size = Min(new_size, size); void *new_ptr = Allocate(new_size); if (new_ptr) @@ -77,6 +75,10 @@ struct DlSymAllocator { return Realloc(ptr, count * size); } + static uptr GetSize(void *ptr) { + return internal_allocator()->GetActuallyAllocatedSize(ptr); + } + static void OnAllocate(const void *ptr, uptr size) {} static void OnFree(const void *ptr, uptr size) {} }; diff --git a/compiler-rt/lib/tysan/tysan_interceptors.cpp b/compiler-rt/lib/tysan/tysan_interceptors.cpp index 08b1010a48ecf..a9c55a3ae0cf0 100644 --- a/compiler-rt/lib/tysan/tysan_interceptors.cpp +++ b/compiler-rt/lib/tysan/tysan_interceptors.cpp @@ -108,6 +108,14 @@ INTERCEPTOR(void *, malloc, uptr size) { return res; } +#if SANITIZER_APPLE +INTERCEPTOR(uptr, malloc_size, void *ptr) { + if (DlsymAlloc::PointerIsMine(ptr)) + return DlsymAlloc::GetSize(ptr); + return REAL(malloc_size)(ptr); +} +#endif + INTERCEPTOR(void *, realloc, void *ptr, uptr size) { if (DlsymAlloc::Use() || DlsymAlloc::PointerIsMine(ptr)) return DlsymAlloc::Realloc(ptr, size); From d01ae567742c4d83b67483e15eb74c0ecd2e8270 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Thu, 9 Jan 2025 12:59:57 -0800 Subject: [PATCH 60/79] =?UTF-8?q?Revert=20"[Exegesis]=20Add=20the=20abilit?= =?UTF-8?q?y=20to=20dry-run=20the=20measurement=20phase=20(=E2=80=A6=20(#1?= =?UTF-8?q?22371)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …#121991)" This reverts commit f8f8598fd886cddfd374fa43eb6d7d37d301b576. This breaks ARMv7 and s390x buildbot with the following message: ``` llvm-exegesis error: No available targets are compatible with triple "armv8l-unknown-linux-gnueabihf" FileCheck error: '' is empty. FileCheck command line: /home/tcwg-buildbot/worker/clang-armv7-2stage/stage2/bin/FileCheck /home/tcwg-buildbot/worker/clang-armv7-2stage/llvm/llvm/test/tools/llvm-exegesis/dry-run-measurement.test ``` --- llvm/docs/CommandGuide/llvm-exegesis.rst | 1 - .../llvm-exegesis/dry-run-measurement.test | 11 ------- .../tools/llvm-exegesis/lib/BenchmarkResult.h | 1 - .../llvm-exegesis/lib/BenchmarkRunner.cpp | 33 +++++-------------- llvm/tools/llvm-exegesis/lib/Target.cpp | 4 +-- llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 9 ++--- 6 files changed, 13 insertions(+), 46 deletions(-) delete mode 100644 llvm/test/tools/llvm-exegesis/dry-run-measurement.test diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst index d357c2ceea418..8266d891a5e6b 100644 --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -301,7 +301,6 @@ OPTIONS * ``prepare-and-assemble-snippet``: Same as ``prepare-snippet``, but also dumps an excerpt of the sequence (hex encoded). * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``. * ``measure``: Same as ``assemble-measured-code``, but also runs the measurement. - * ``dry-run-measurement``: Same as measure, but does not actually execute the snippet. .. option:: --x86-lbr-sample-period= diff --git a/llvm/test/tools/llvm-exegesis/dry-run-measurement.test b/llvm/test/tools/llvm-exegesis/dry-run-measurement.test deleted file mode 100644 index e4449d7df3d82..0000000000000 --- a/llvm/test/tools/llvm-exegesis/dry-run-measurement.test +++ /dev/null @@ -1,11 +0,0 @@ -# RUN: llvm-exegesis --mtriple=riscv64 --mcpu=sifive-p470 --mode=latency --opcode-name=ADD --use-dummy-perf-counters --benchmark-phase=dry-run-measurement | FileCheck %s -# REQUIRES: riscv-registered-target - -# This test makes sure that llvm-exegesis doesn't execute "cross-compiled" snippets in the presence of -# --dry-run-measurement. RISC-V was chosen simply because most of the time we run tests on X86 machines. - -# Should not contain misleading results. -# CHECK: measurements: [] - -# Should not contain error messages like "snippet crashed while running: Segmentation fault". -# CHECK: error: '' diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h index 5480d85616878..3c09a8380146e 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -38,7 +38,6 @@ enum class BenchmarkPhaseSelectorE { PrepareAndAssembleSnippet, AssembleMeasuredCode, Measure, - DryRunMeasure, }; enum class BenchmarkFilter { All, RegOnly, WithMem }; diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp index cc46f7feb6cf7..a7771b99e97b1 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -99,7 +99,7 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { static Expected> create(const LLVMState &State, object::OwningBinary Obj, BenchmarkRunner::ScratchSpace *Scratch, - std::optional BenchmarkProcessCPU, bool DryRun) { + std::optional BenchmarkProcessCPU) { Expected EF = ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); @@ -107,17 +107,14 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { return EF.takeError(); return std::unique_ptr( - new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch, - DryRun)); + new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch)); } private: InProcessFunctionExecutorImpl(const LLVMState &State, ExecutableFunction Function, - BenchmarkRunner::ScratchSpace *Scratch, - bool DryRun) - : State(State), Function(std::move(Function)), Scratch(Scratch), - DryRun(DryRun) {} + BenchmarkRunner::ScratchSpace *Scratch) + : State(State), Function(std::move(Function)), Scratch(Scratch) {} static void accumulateCounterValues(const SmallVector &NewValues, SmallVector *Result) { @@ -146,14 +143,9 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { CrashRecoveryContext CRC; CrashRecoveryContext::Enable(); const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() { - if (DryRun) { - Counter->start(); - Counter->stop(); - } else { - Counter->start(); - this->Function(ScratchPtr); - Counter->stop(); - } + Counter->start(); + this->Function(ScratchPtr); + Counter->stop(); }); CrashRecoveryContext::Disable(); PS.reset(); @@ -185,7 +177,6 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { const LLVMState &State; const ExecutableFunction Function; BenchmarkRunner::ScratchSpace *const Scratch; - bool DryRun = false; }; #ifdef __linux__ @@ -673,9 +664,6 @@ Expected> BenchmarkRunner::createFunctionExecutor( object::OwningBinary ObjectFile, const BenchmarkKey &Key, std::optional BenchmarkProcessCPU) const { - bool DryRun = - BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::DryRunMeasure; - switch (ExecutionMode) { case ExecutionModeE::InProcess: { if (BenchmarkProcessCPU.has_value()) @@ -683,8 +671,7 @@ BenchmarkRunner::createFunctionExecutor( "support benchmark core pinning."); auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create( - State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU, - DryRun); + State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU); if (!InProcessExecutorOrErr) return InProcessExecutorOrErr.takeError(); @@ -692,10 +679,6 @@ BenchmarkRunner::createFunctionExecutor( } case ExecutionModeE::SubProcess: { #ifdef __linux__ - if (DryRun) - return make_error("The subprocess execution mode cannot " - "dry-run measurement at this moment."); - auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create( State, std::move(ObjectFile), Key, BenchmarkProcessCPU); if (!SubProcessExecutorOrErr) diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp index e2251ff978888..29e58692f0e92 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/Target.cpp @@ -98,7 +98,7 @@ ExegesisTarget::createBenchmarkRunner( return nullptr; case Benchmark::Latency: case Benchmark::InverseThroughput: - if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure && + if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && !PfmCounters.CycleCounter) { const char *ModeName = Mode == Benchmark::Latency ? "latency" @@ -116,7 +116,7 @@ ExegesisTarget::createBenchmarkRunner( State, Mode, BenchmarkPhaseSelector, ResultAggMode, ExecutionMode, ValidationCounters, BenchmarkRepeatCount); case Benchmark::Uops: - if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure && + if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && !PfmCounters.UopsCounter && !PfmCounters.IssueCounters) return make_error( "can't run 'uops' mode, sched model does not define uops or issue " diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index 07bd44ee64f1f..fa37e05956be8 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -132,10 +132,7 @@ static cl::opt BenchmarkPhaseSelector( clEnumValN( BenchmarkPhaseSelectorE::Measure, "measure", "Same as prepare-measured-code, but also runs the measurement " - "(default)"), - clEnumValN( - BenchmarkPhaseSelectorE::DryRunMeasure, "dry-run-measurement", - "Same as measure, but does not actually execute the snippet")), + "(default)")), cl::init(BenchmarkPhaseSelectorE::Measure)); static cl::opt @@ -479,7 +476,7 @@ static void runBenchmarkConfigurations( } void benchmarkMain() { - if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure && + if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && !UseDummyPerfCounters) { #ifndef HAVE_LIBPFM ExitWithError( @@ -504,7 +501,7 @@ void benchmarkMain() { // Preliminary check to ensure features needed for requested // benchmark mode are present on target CPU and/or OS. - if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure) + if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure) ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess && From f764e71b7017e7264fffc410f5e10ef959e49294 Mon Sep 17 00:00:00 2001 From: Victor Mustya Date: Thu, 9 Jan 2025 13:03:16 -0800 Subject: [PATCH 61/79] [Clang][TableGen] Add missing __bf16 type to the builtins parser (#120662) The Clang tablegen built-in function prototype parser has the `__bf16` type missing. This patch adds the missing type to the parser. --- clang/test/TableGen/target-builtins-prototype-parser.td | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/clang/test/TableGen/target-builtins-prototype-parser.td b/clang/test/TableGen/target-builtins-prototype-parser.td index 555aebb3ccfb1..a753f906a674f 100644 --- a/clang/test/TableGen/target-builtins-prototype-parser.td +++ b/clang/test/TableGen/target-builtins-prototype-parser.td @@ -57,6 +57,12 @@ def : Builtin { let Spellings = ["__builtin_08"]; } +def : Builtin { +// CHECK: BUILTIN(__builtin_09, "V2yy", "") + let Prototype = "_Vector<2, __bf16>(__bf16)"; + let Spellings = ["__builtin_09"]; +} + #ifdef ERROR_EXPECTED_LANES def : Builtin { // ERROR_EXPECTED_LANES: :[[# @LINE + 1]]:7: error: Expected number of lanes after '_ExtVector<' @@ -112,4 +118,3 @@ def : Builtin { let Spellings = ["__builtin_test_use_clang_extended_vectors"]; } #endif - From c492a228e9227e2e44671b0f345fee9de62517bd Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 9 Jan 2025 16:03:34 -0500 Subject: [PATCH 62/79] [libc++] Add missing _LIBCPP_NODEBUG on internal aliases --- libcxx/include/__algorithm/radix_sort.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h index c39b26a8620f4..95f04a8bb31f6 100644 --- a/libcxx/include/__algorithm/radix_sort.h +++ b/libcxx/include/__algorithm/radix_sort.h @@ -88,10 +88,10 @@ __partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator template struct __radix_sort_traits { - using __image_type = decay_t::type>; + using __image_type _LIBCPP_NODEBUG = decay_t::type>; static_assert(is_unsigned<__image_type>::value); - using __radix_type = decay_t::type>; + using __radix_type _LIBCPP_NODEBUG = decay_t::type>; static_assert(is_integral<__radix_type>::value); static constexpr auto __radix_value_range = numeric_limits<__radix_type>::max() + 1; @@ -101,7 +101,7 @@ struct __radix_sort_traits { template struct __counting_sort_traits { - using __image_type = decay_t::type>; + using __image_type _LIBCPP_NODEBUG = decay_t::type>; static_assert(is_unsigned<__image_type>::value); static constexpr const auto __value_range = numeric_limits<__image_type>::max() + 1; From 328c3a843f886f3768e536a508e1e3723d834b3e Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 16:10:40 -0500 Subject: [PATCH 63/79] [RISCV][VLOPT] Add vmerge to isSupportedInstr (#122340) --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 4 ++ .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 3 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 2 - .../CodeGen/RISCV/rvv/narrow-shift-extend.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 60 +++++++++++++++++++ 5 files changed, 66 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 89684decde6c1..ab73b112e6dd8 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -944,6 +944,10 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMADD_VX: case RISCV::VNMSUB_VV: case RISCV::VNMSUB_VX: + // Vector Integer Merge Instructions + case RISCV::VMERGE_VIM: + case RISCV::VMERGE_VVM: + case RISCV::VMERGE_VXM: // Vector Widening Integer Multiply-Add Instructions case RISCV::VWMACCU_VV: case RISCV::VWMACCU_VX: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 3bed5cd04f214..c628a0d620498 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -697,7 +697,7 @@ define void @buildvec_seq_v9i8(ptr %x) { ; CHECK-LABEL: buildvec_seq_v9i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 73 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 @@ -706,7 +706,6 @@ define void @buildvec_seq_v9i8(ptr %x) { ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 2, v0 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 80e462c937690..392709fdb4cf7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1290,10 +1290,8 @@ define void @mulhs_v6i16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: li a1, 22 ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -7 ; CHECK-NEXT: vmerge.vim v9, v9, 7, v0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll index 3fbe635576c9b..53f8a2503354c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll +++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll @@ -142,10 +142,9 @@ entry: define @test_vloxei7(ptr %ptr, %offset, i64 %vl) { ; CHECK-LABEL: test_vloxei7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vsll.vi v12, v8, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vloxei64.v v8, (a0), v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 46fbba35c35a2..da7917cae8ba6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3535,3 +3535,63 @@ define @vmfgt_vv( %a, %b %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) ret %2 } + +define @vmerge_vvm( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmerge_vvm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmerge.vvm v8, v8, v10, v0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmerge_vvm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmerge.vvm v8, v8, v10, v0 +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmerge.nxv4i32.nxv4i32( poison, %a, %b, %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vmerge_vxm( %a, i32 %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmerge_vxm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmerge.vxm v8, v8, a0, v0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmerge_vxm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vmerge.vxm v8, v8, a0, v0 +; VLOPT-NEXT: vadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmerge.nxv4i32.i32( poison, %a, i32 %b, %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vmerge_vim( %a, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmerge_vim: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmerge.vim v8, v8, 9, v0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmerge_vim: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmerge.vim v8, v8, 9, v0 +; VLOPT-NEXT: vadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmerge.nxv4i32.i32( poison, %a, i32 9, %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %1, iXLen %vl) + ret %2 +} From 03eb786f75e36e9e203e0092ec3c6c589fd53c4f Mon Sep 17 00:00:00 2001 From: Tom Honermann Date: Thu, 9 Jan 2025 16:40:04 -0500 Subject: [PATCH 64/79] [SYCL] Correct misplaced sycl_kernel_entry_point attribute in a namespace declaration of a negative test. (#122375) Commit 1a73654b3212b623ac21b9deb3aeaadc6906b7e4 added a missing diagnostic for incorrect placement of an attribute in a namespace declaration. This change corrects a SYCL test that inadvertently exercised the `sycl_kernel_entry_point` attribute in the wrong declaration location. --- .../SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp index 5b3cf9853173d..a87af7ca298ac 100644 --- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp +++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp @@ -170,7 +170,7 @@ struct B3 { }; // expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}} -namespace bad4 [[clang::sycl_kernel_entry_point(BADKN<4>)]] {} +namespace [[clang::sycl_kernel_entry_point(BADKN<4>)]] bad4 {} #if __cplusplus >= 202002L // expected-error@+2 {{'sycl_kernel_entry_point' attribute only applies to functions}} From 0efb376c20b220cddbbcf57f0c82ae048170ffd9 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 9 Jan 2025 13:40:54 -0800 Subject: [PATCH 65/79] [libc][test] remove C++ stdlib includes (#122369) These make cross compiling the test suite more difficult, as you need the sysroot to contain these headers and libraries cross compiled for your target. It's straightforward to stick with the corresponding C headers. --- libc/test/UnitTest/ExecuteFunctionUnix.cpp | 17 ++++++++--------- libc/test/UnitTest/FPExceptMatcher.cpp | 7 +++---- libc/test/UnitTest/LibcDeathTestExecutors.cpp | 2 +- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/libc/test/UnitTest/ExecuteFunctionUnix.cpp b/libc/test/UnitTest/ExecuteFunctionUnix.cpp index 3a657c00851c7..df71738668592 100644 --- a/libc/test/UnitTest/ExecuteFunctionUnix.cpp +++ b/libc/test/UnitTest/ExecuteFunctionUnix.cpp @@ -8,13 +8,13 @@ #include "ExecuteFunction.h" #include "src/__support/macros/config.h" -#include -#include -#include -#include -#include +#include "test/UnitTest/ExecuteFunction.h" // FunctionCaller +#include #include #include +#include +#include +#include #include #include @@ -35,21 +35,20 @@ int ProcessStatus::get_fatal_signal() { } ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) { - std::unique_ptr X(func); int pipe_fds[2]; if (::pipe(pipe_fds) == -1) return ProcessStatus::error("pipe(2) failed"); // Don't copy the buffers into the child process and print twice. - std::cout.flush(); - std::cerr.flush(); + ::fflush(stderr); + ::fflush(stdout); pid_t pid = ::fork(); if (pid == -1) return ProcessStatus::error("fork(2) failed"); if (!pid) { (*func)(); - std::exit(0); + ::exit(0); } ::close(pipe_fds[1]); diff --git a/libc/test/UnitTest/FPExceptMatcher.cpp b/libc/test/UnitTest/FPExceptMatcher.cpp index 37ba0a0a7abde..889f9f6f44a91 100644 --- a/libc/test/UnitTest/FPExceptMatcher.cpp +++ b/libc/test/UnitTest/FPExceptMatcher.cpp @@ -9,11 +9,11 @@ #include "FPExceptMatcher.h" #include "src/__support/macros/config.h" +#include "test/UnitTest/ExecuteFunction.h" // FunctionCaller #include "test/UnitTest/Test.h" #include "hdr/types/fenv_t.h" #include "src/__support/FPUtil/FEnvImpl.h" -#include #include #include @@ -37,14 +37,13 @@ static void sigfpeHandler(int sig) { } FPExceptMatcher::FPExceptMatcher(FunctionCaller *func) { - auto oldSIGFPEHandler = signal(SIGFPE, &sigfpeHandler); - std::unique_ptr funcUP(func); + sighandler_t oldSIGFPEHandler = signal(SIGFPE, &sigfpeHandler); caughtExcept = false; fenv_t oldEnv; fputil::get_env(&oldEnv); if (sigsetjmp(jumpBuffer, 1) == 0) - funcUP->call(); + func->call(); // We restore the previous floating point environment after // the call to the function which can potentially raise SIGFPE. fputil::set_env(&oldEnv); diff --git a/libc/test/UnitTest/LibcDeathTestExecutors.cpp b/libc/test/UnitTest/LibcDeathTestExecutors.cpp index 77b0559c7acea..943e2c23c5fde 100644 --- a/libc/test/UnitTest/LibcDeathTestExecutors.cpp +++ b/libc/test/UnitTest/LibcDeathTestExecutors.cpp @@ -12,7 +12,7 @@ #include "test/UnitTest/ExecuteFunction.h" #include "test/UnitTest/TestLogger.h" -#include +#include namespace { constexpr unsigned TIMEOUT_MS = 10000; From 4f42e165164ba2bfca7b87be2a533ef09e8777e0 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 9 Jan 2025 13:48:26 -0800 Subject: [PATCH 66/79] [hwasan] Omit tag check for null pointers (#122206) If the pointer to be checked is statically known to be zero, the tag check will always pass since: 1) the tag is zero 2) shadow memory for address 0 is initialized to 0 and never updated. We can therefore elide the tag check. We perform the elision in two places: 1) the HWASan pass 2) when lowering the CHECK_MEMACCESS intrinsic. Conceivably, the HWASan pass may encounter a "cannot currently statically prove to be null" pointer (and is therefore unable to omit the intrinsic) that later optimization passes convert into a statically known-null pointer. As a last line of defense, we perform elision here too. This also updates the tests from https://github.com/llvm/llvm-project/pull/122186 --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 9 +++++++++ .../Instrumentation/HWAddressSanitizer.cpp | 20 +++++++++++++++---- llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll | 12 ++++++----- .../HWAddressSanitizer/zero-ptr.ll | 4 +--- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 9bec782ca8ce9..9d9d9889b3858 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -605,6 +605,15 @@ void AArch64AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) { Register Reg = MI.getOperand(0).getReg(); + + // The HWASan pass won't emit a CHECK_MEMACCESS intrinsic with a pointer + // statically known to be zero. However, conceivably, the HWASan pass may + // encounter a "cannot currently statically prove to be null" pointer (and is + // therefore unable to omit the intrinsic) that later optimization passes + // convert into a statically known-null pointer. + if (Reg == AArch64::XZR) + return; + bool IsShort = ((MI.getOpcode() == AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES) || (MI.getOpcode() == diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 2031728c2f33d..75a19357ea1bb 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -348,7 +348,7 @@ class HWAddressSanitizer { bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI); void instrumentMemIntrinsic(MemIntrinsic *MI); bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU, - LoopInfo *LI); + LoopInfo *LI, const DataLayout &DL); bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr); bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst, Value *Ptr); @@ -1163,12 +1163,23 @@ void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { } bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O, - DomTreeUpdater &DTU, - LoopInfo *LI) { + DomTreeUpdater &DTU, LoopInfo *LI, + const DataLayout &DL) { Value *Addr = O.getPtr(); LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n"); + // If the pointer is statically known to be zero, the tag check will pass + // since: + // 1) it has a zero tag + // 2) the shadow memory corresponding to address 0 is initialized to zero and + // never updated. + // We can therefore elide the tag check. + llvm::KnownBits Known(DL.getPointerTypeSizeInBits(Addr->getType())); + llvm::computeKnownBits(Addr, Known, DL); + if (Known.isZero()) + return false; + if (O.MaybeMask) return false; // FIXME @@ -1701,8 +1712,9 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, PostDominatorTree *PDT = FAM.getCachedResult(F); LoopInfo *LI = FAM.getCachedResult(F); DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy); + const DataLayout &DL = F.getDataLayout(); for (auto &Operand : OperandsToInstrument) - instrumentMemAccess(Operand, DTU, LI); + instrumentMemAccess(Operand, DTU, LI, DL); DTU.flush(); if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) { diff --git a/llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll b/llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll index dca39fe03fb10..b2eaa31007c35 100644 --- a/llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll +++ b/llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll @@ -1,11 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -filetype asm -o - %s | FileCheck %s -; This shows that when dereferencing a null pointer, HWASan will call -; __hwasan_check_x4294967071_19_fixed_0_short_v2 -; (N.B. 4294967071 == 2**32 - 239 + 14 == 2**32 - X0 + XZR +; This shows that CodeGen for AArch64 will elide the tag check when lowering +; the HWASan memaccess intrinsic for null pointers. ; -; The source was generated from llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll. +; N.B. The HWASan pass will normally omit the memaccess intrinsic if the +; pointer is already statically known to be null. +; +; The source was generated from llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll +; with the memaccess deliberately retained. ; ModuleID = '' source_filename = "" @@ -25,7 +28,6 @@ define void @test_store_to_zeroptr() #0 { ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __hwasan_check_x4294967071_19_fixed_0_short_v2 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov w9, #42 // =0x2a ; CHECK-NEXT: str x9, [x8] diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll b/llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll index a201174df995b..95cf6f1544df0 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -passes=hwasan -S | FileCheck %s ; RUN: opt < %s -passes=hwasan -hwasan-recover=0 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=ABORT-ZERO-BASED-SHADOW -; This shows that HWASan will emit a memaccess check when dereferencing a null +; This shows that HWASan omits the memaccess check when dereferencing a null ; pointer. ; The output is used as the source for llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll. @@ -15,7 +15,6 @@ define void @test_store_to_zeroptr() sanitize_hwaddress { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) ; CHECK-NEXT: [[B:%.*]] = inttoptr i64 0 to ptr -; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[B]], i32 19) ; CHECK-NEXT: store i64 42, ptr [[B]], align 8 ; CHECK-NEXT: ret void ; @@ -24,7 +23,6 @@ define void @test_store_to_zeroptr() sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: entry: ; ABORT-ZERO-BASED-SHADOW-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr null) ; ABORT-ZERO-BASED-SHADOW-NEXT: [[B:%.*]] = inttoptr i64 0 to ptr -; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules.fixedshadow(ptr [[B]], i32 19, i64 0) ; ABORT-ZERO-BASED-SHADOW-NEXT: store i64 42, ptr [[B]], align 8 ; ABORT-ZERO-BASED-SHADOW-NEXT: ret void ; From 7ffb691595a3108f72023ae0051487df25a85fc3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 9 Jan 2025 22:02:29 +0000 Subject: [PATCH 67/79] [VPlan] Remove dead ToRemove (NFC). --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3e3f5adf73a0f..8f6fb07b1e4f2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -588,7 +588,6 @@ static SmallVector collectUsersRecursively(VPValue *V) { /// vector extracts. static void legalizeAndOptimizeInductions(VPlan &Plan) { using namespace llvm::VPlanPatternMatch; - SmallVector ToRemove; VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi()); From 156e6051630d136b48090c0d1cb79a4457564e9d Mon Sep 17 00:00:00 2001 From: alx32 <103613512+alx32@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:14:13 -0800 Subject: [PATCH 68/79] [lld-macho] Fix branch extension thunk estimation logic (#120529) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch improves the linker’s ability to estimate stub reachability in the `TextOutputSection::estimateStubsInRangeVA` function. It does so by including thunks that have already been placed ahead of the current call site address when calculating the threshold for direct stub calls. Before this fix, the estimation process overlooked existing forward thunks. This could result in some thunks not being inserted where needed. In rare situations, particularly with large and specially arranged codebases, this might lead to branch instructions being out of range, causing linking errors. Although this patch successfully addresses the problem, it is not feasible to create a test for this issue. The specific layout and order of thunk creation required to reproduce the corner case are too complex, making test creation impractical. Example error messages the issue could generate: ``` ld64.lld: error: banana.o:(symbol OUTLINED_FUNCTION_24949_3875): relocation BRANCH26 is out of range: 134547892 is not in [-134217728, 134217727]; references objc_autoreleaseReturnValue ld64.lld: error: main.o:(symbol _main+0xc): relocation BRANCH26 is out of range: 134544132 is not in [-134217728, 134217727]; references objc_release ``` --- lld/MachO/ConcatOutputSection.cpp | 36 +++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp index 6a9301f84a03e..d64816ec695ad 100644 --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -184,15 +184,43 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const { InputSection *isec = inputs[i]; isecEnd = alignToPowerOf2(isecEnd, isec->align) + isec->getSize(); } - // Estimate the address after which call sites can safely call stubs - // directly rather than through intermediary thunks. + + // Tally up any thunks that have already been placed that have VA higher than + // inputs[callIdx]. First, find the index of the first thunk that is beyond + // the current inputs[callIdx]. + auto itPostcallIdxThunks = + llvm::partition_point(thunks, [isecVA](const ConcatInputSection *t) { + return t->getVA() <= isecVA; + }); + uint64_t existingForwardThunks = thunks.end() - itPostcallIdxThunks; + uint64_t forwardBranchRange = target->forwardBranchRange; assert(isecEnd > forwardBranchRange && "should not run thunk insertion if all code fits in jump range"); assert(isecEnd - isecVA <= forwardBranchRange && "should only finalize sections in jump range"); - uint64_t stubsInRangeVA = isecEnd + maxPotentialThunks * target->thunkSize + - in.stubs->getSize() - forwardBranchRange; + + // Estimate the maximum size of the code, right before the stubs section. + uint64_t maxTextSize = 0; + // Add the size of all the inputs, including the unprocessed ones. + maxTextSize += isecEnd; + + // Add the size of the thunks that have already been created that are ahead of + // inputs[callIdx]. These are already created thunks that will be interleaved + // with inputs[callIdx...end]. + maxTextSize += existingForwardThunks * target->thunkSize; + + // Add the size of the thunks that may be created in the future. Since + // 'maxPotentialThunks' overcounts, this is an estimate of the upper limit. + maxTextSize += maxPotentialThunks * target->thunkSize; + + // Estimated maximum VA of last stub. + uint64_t maxVAOfLastStub = maxTextSize + in.stubs->getSize(); + + // Estimate the address after which call sites can safely call stubs + // directly rather than through intermediary thunks. + uint64_t stubsInRangeVA = maxVAOfLastStub - forwardBranchRange; + log("thunks = " + std::to_string(thunkMap.size()) + ", potential = " + std::to_string(maxPotentialThunks) + ", stubs = " + std::to_string(in.stubs->getSize()) + ", isecVA = " + From 3caa68a021c2f795de3df7f6ad7953556e825fab Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 9 Jan 2025 14:15:31 -0800 Subject: [PATCH 69/79] [libc][test] fix memory leak (#122378) Looks like the smart pointer I removed was being used to free the underlying object. Fixes: #122369 --- libc/test/UnitTest/ExecuteFunctionUnix.cpp | 19 +++++++++++++++---- libc/test/UnitTest/FPExceptMatcher.cpp | 1 + 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/libc/test/UnitTest/ExecuteFunctionUnix.cpp b/libc/test/UnitTest/ExecuteFunctionUnix.cpp index df71738668592..b004e8c089117 100644 --- a/libc/test/UnitTest/ExecuteFunctionUnix.cpp +++ b/libc/test/UnitTest/ExecuteFunctionUnix.cpp @@ -36,18 +36,23 @@ int ProcessStatus::get_fatal_signal() { ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) { int pipe_fds[2]; - if (::pipe(pipe_fds) == -1) + if (::pipe(pipe_fds) == -1) { + ::free(func); return ProcessStatus::error("pipe(2) failed"); + } // Don't copy the buffers into the child process and print twice. ::fflush(stderr); ::fflush(stdout); pid_t pid = ::fork(); - if (pid == -1) + if (pid == -1) { + ::free(func); return ProcessStatus::error("fork(2) failed"); + } if (!pid) { (*func)(); + ::free(func); ::exit(0); } ::close(pipe_fds[1]); @@ -57,11 +62,14 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) { }; // No events requested so this call will only return after the timeout or if // the pipes peer was closed, signaling the process exited. - if (::poll(&poll_fd, 1, timeout_ms) == -1) + if (::poll(&poll_fd, 1, timeout_ms) == -1) { + ::free(func); return ProcessStatus::error("poll(2) failed"); + } // If the pipe wasn't closed by the child yet then timeout has expired. if (!(poll_fd.revents & POLLHUP)) { ::kill(pid, SIGKILL); + ::free(func); return ProcessStatus::timed_out_ps(); } @@ -69,9 +77,12 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) { // Wait on the pid of the subprocess here so it gets collected by the system // and doesn't turn into a zombie. pid_t status = ::waitpid(pid, &wstatus, 0); - if (status == -1) + if (status == -1) { + ::free(func); return ProcessStatus::error("waitpid(2) failed"); + } assert(status == pid); + ::free(func); return {wstatus}; } diff --git a/libc/test/UnitTest/FPExceptMatcher.cpp b/libc/test/UnitTest/FPExceptMatcher.cpp index 889f9f6f44a91..928c3df6ac48a 100644 --- a/libc/test/UnitTest/FPExceptMatcher.cpp +++ b/libc/test/UnitTest/FPExceptMatcher.cpp @@ -44,6 +44,7 @@ FPExceptMatcher::FPExceptMatcher(FunctionCaller *func) { fputil::get_env(&oldEnv); if (sigsetjmp(jumpBuffer, 1) == 0) func->call(); + free(func); // We restore the previous floating point environment after // the call to the function which can potentially raise SIGFPE. fputil::set_env(&oldEnv); From e6d061ad49ce11193bddfff8a7920b798fb7d214 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 9 Jan 2025 14:37:14 -0800 Subject: [PATCH 70/79] [libclang/python] Add python bindings for PrintingPolicy (#120494) This allows changing the way pretty-printed code is formatted. --- clang/bindings/python/clang/cindex.py | 83 +++++++++++++++++++ .../python/tests/cindex/test_cursor.py | 14 ++++ clang/docs/ReleaseNotes.rst | 2 + 3 files changed, 99 insertions(+) diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index b6e0d71c1ae1b..7caf0bbfd722a 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -1770,6 +1770,16 @@ def spelling(self): return self._spelling + def pretty_printed(self, policy): + """ + Pretty print declarations. + Parameters: + policy -- The policy to control the entities being printed. + """ + return _CXString.from_result( + conf.lib.clang_getCursorPrettyPrinted(self, policy) + ) + @property def displayname(self): """ @@ -3699,6 +3709,72 @@ def write_main_file_to_stdout(self): conf.lib.clang_CXRewriter_writeMainFileToStdOut(self) +class PrintingPolicyProperty(BaseEnumeration): + + """ + A PrintingPolicyProperty identifies a property of a PrintingPolicy. + """ + + Indentation = 0 + SuppressSpecifiers = 1 + SuppressTagKeyword = 2 + IncludeTagDefinition = 3 + SuppressScope = 4 + SuppressUnwrittenScope = 5 + SuppressInitializers = 6 + ConstantArraySizeAsWritten = 7 + AnonymousTagLocations = 8 + SuppressStrongLifetime = 9 + SuppressLifetimeQualifiers = 10 + SuppressTemplateArgsInCXXConstructors = 11 + Bool = 12 + Restrict = 13 + Alignof = 14 + UnderscoreAlignof = 15 + UseVoidForZeroParams = 16 + TerseOutput = 17 + PolishForDeclaration = 18 + Half = 19 + MSWChar = 20 + IncludeNewlines = 21 + MSVCFormatting = 22 + ConstantsAsWritten = 23 + SuppressImplicitBase = 24 + FullyQualifiedName = 25 + + +class PrintingPolicy(ClangObject): + """ + The PrintingPolicy is a wrapper class around clang::PrintingPolicy + + It allows specifying how declarations, expressions, and types should be + pretty-printed. + """ + + @staticmethod + def create(cursor): + """ + Creates a new PrintingPolicy + Parameters: + cursor -- Any cursor for a translation unit. + """ + return PrintingPolicy(conf.lib.clang_getCursorPrintingPolicy(cursor)) + + def __init__(self, ptr): + ClangObject.__init__(self, ptr) + + def __del__(self): + conf.lib.clang_PrintingPolicy_dispose(self) + + def get_property(self, property): + """Get a property value for the given printing policy.""" + return conf.lib.clang_PrintingPolicy_getProperty(self, property.value) + + def set_property(self, property, value): + """Set a property value for the given printing policy.""" + conf.lib.clang_PrintingPolicy_setProperty(self, property.value, value) + + # Now comes the plumbing to hook up the C library. # Register callback types @@ -3801,6 +3877,8 @@ def write_main_file_to_stdout(self): ("clang_getCursorExtent", [Cursor], SourceRange), ("clang_getCursorLexicalParent", [Cursor], Cursor), ("clang_getCursorLocation", [Cursor], SourceLocation), + ("clang_getCursorPrettyPrinted", [Cursor, PrintingPolicy], _CXString), + ("clang_getCursorPrintingPolicy", [Cursor], c_object_p), ("clang_getCursorReferenced", [Cursor], Cursor), ("clang_getCursorReferenceNameRange", [Cursor, c_uint, c_uint], SourceRange), ("clang_getCursorResultType", [Cursor], Type), @@ -3924,6 +4002,9 @@ def write_main_file_to_stdout(self): ("clang_Cursor_isAnonymousRecordDecl", [Cursor], bool), ("clang_Cursor_isBitField", [Cursor], bool), ("clang_Location_isInSystemHeader", [SourceLocation], bool), + ("clang_PrintingPolicy_dispose", [PrintingPolicy]), + ("clang_PrintingPolicy_getProperty", [PrintingPolicy, c_int], c_uint), + ("clang_PrintingPolicy_setProperty", [PrintingPolicy, c_int, c_uint]), ("clang_Type_getAlignOf", [Type], c_longlong), ("clang_Type_getClassType", [Type], Type), ("clang_Type_getNumTemplateArguments", [Type], c_int), @@ -4104,6 +4185,8 @@ def function_exists(self, name: str) -> bool: "FixIt", "Index", "LinkageKind", + "PrintingPolicy", + "PrintingPolicyProperty", "RefQualifierKind", "SourceLocation", "SourceRange", diff --git a/clang/bindings/python/tests/cindex/test_cursor.py b/clang/bindings/python/tests/cindex/test_cursor.py index 4d989a7421e79..c6aa65ce3c29f 100644 --- a/clang/bindings/python/tests/cindex/test_cursor.py +++ b/clang/bindings/python/tests/cindex/test_cursor.py @@ -5,6 +5,8 @@ BinaryOperator, Config, CursorKind, + PrintingPolicy, + PrintingPolicyProperty, StorageClass, TemplateArgumentKind, TranslationUnit, @@ -981,3 +983,15 @@ def test_from_result_null(self): def test_from_cursor_result_null(self): tu = get_tu("") self.assertEqual(tu.cursor.semantic_parent, None) + + def test_pretty_print(self): + tu = get_tu("struct X { int x; }; void f(bool x) { }", lang="cpp") + f = get_cursor(tu, "f") + + self.assertEqual(f.displayname, "f(bool)") + pp = PrintingPolicy.create(f) + self.assertEqual(pp.get_property(PrintingPolicyProperty.Bool), True) + self.assertEqual(f.pretty_printed(pp), "void f(bool x) {\n}\n") + pp.set_property(PrintingPolicyProperty.Bool, False) + self.assertEqual(pp.get_property(PrintingPolicyProperty.Bool), False) + self.assertEqual(f.pretty_printed(pp), "void f(_Bool x) {\n}\n") diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5ac886856c539..07a1a4195427d 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1298,6 +1298,8 @@ Sanitizers Python Binding Changes ---------------------- - Fixed an issue that led to crashes when calling ``Type.get_exception_specification_kind``. +- Added bindings for ``clang_getCursorPrettyPrinted`` and related functions, + which allow changing the formatting of pretty-printed code. - Added binding for ``clang_Cursor_isAnonymousRecordDecl``, which allows checking if a declaration is an anonymous union or anonymous struct. From 9426fdd4cbd6812b69c218b865f184cb25342be4 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 9 Jan 2025 14:46:52 -0800 Subject: [PATCH 71/79] [libc][test] fix memory leak pt.2 (#122384) These were created with operator new (see `Test::createCallable`), so operator delete should be used instead of free(). Fixes: #122369 Fixes: #122378 --- libc/test/UnitTest/ExecuteFunctionUnix.cpp | 14 +++++++------- libc/test/UnitTest/FPExceptMatcher.cpp | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libc/test/UnitTest/ExecuteFunctionUnix.cpp b/libc/test/UnitTest/ExecuteFunctionUnix.cpp index b004e8c089117..c90d7e0cb9ff1 100644 --- a/libc/test/UnitTest/ExecuteFunctionUnix.cpp +++ b/libc/test/UnitTest/ExecuteFunctionUnix.cpp @@ -37,7 +37,7 @@ int ProcessStatus::get_fatal_signal() { ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) { int pipe_fds[2]; if (::pipe(pipe_fds) == -1) { - ::free(func); + delete func; return ProcessStatus::error("pipe(2) failed"); } @@ -46,13 +46,13 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) { ::fflush(stdout); pid_t pid = ::fork(); if (pid == -1) { - ::free(func); + delete func; return ProcessStatus::error("fork(2) failed"); } if (!pid) { (*func)(); - ::free(func); + delete func; ::exit(0); } ::close(pipe_fds[1]); @@ -63,13 +63,13 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) { // No events requested so this call will only return after the timeout or if // the pipes peer was closed, signaling the process exited. if (::poll(&poll_fd, 1, timeout_ms) == -1) { - ::free(func); + delete func; return ProcessStatus::error("poll(2) failed"); } // If the pipe wasn't closed by the child yet then timeout has expired. if (!(poll_fd.revents & POLLHUP)) { ::kill(pid, SIGKILL); - ::free(func); + delete func; return ProcessStatus::timed_out_ps(); } @@ -78,11 +78,11 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) { // and doesn't turn into a zombie. pid_t status = ::waitpid(pid, &wstatus, 0); if (status == -1) { - ::free(func); + delete func; return ProcessStatus::error("waitpid(2) failed"); } assert(status == pid); - ::free(func); + delete func; return {wstatus}; } diff --git a/libc/test/UnitTest/FPExceptMatcher.cpp b/libc/test/UnitTest/FPExceptMatcher.cpp index 928c3df6ac48a..119a06985b8f1 100644 --- a/libc/test/UnitTest/FPExceptMatcher.cpp +++ b/libc/test/UnitTest/FPExceptMatcher.cpp @@ -44,7 +44,7 @@ FPExceptMatcher::FPExceptMatcher(FunctionCaller *func) { fputil::get_env(&oldEnv); if (sigsetjmp(jumpBuffer, 1) == 0) func->call(); - free(func); + delete func; // We restore the previous floating point environment after // the call to the function which can potentially raise SIGFPE. fputil::set_env(&oldEnv); From ba704d59569151f1b8b6552ed22a7b840f5e6256 Mon Sep 17 00:00:00 2001 From: alx32 <103613512+alx32@users.noreply.github.com> Date: Thu, 9 Jan 2025 15:01:06 -0800 Subject: [PATCH 72/79] [llvm-gsymutil] Address merged-funcs test non-determinism (#122308) When creating a gSYM, `llvm-gsymutil` operates by default in multi-threaded mode. If merged functions are present, determinism cannot be guaranteed if parallel processing is enabled. So, for our merged functions tests, we disable multi-threading during gSYM creation. --- .../macho-gsym-callsite-info-dsym.yaml | 2 +- .../macho-gsym-callsite-info-exe.yaml | 2 +- .../macho-gsym-callsite-info-obj.test | 2 +- .../macho-gsym-merged-callsites-dsym.yaml | 4 +- .../ARM_AArch64/macho-merged-funcs-dwarf.yaml | 69 +++++++++---------- 5 files changed, 39 insertions(+), 40 deletions(-) diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml index c636afe5cb850..6d0dc7f6f3033 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml @@ -4,7 +4,7 @@ # RUN: split-file %s %t # RUN: yaml2obj %t/call_sites.dSYM.yaml -o %t/call_sites.dSYM -# RUN: llvm-gsymutil --convert=%t/call_sites.dSYM --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym +# RUN: llvm-gsymutil --num-threads=1 --convert=%t/call_sites.dSYM --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym # Dump the GSYM file and check the output for callsite information # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-GSYM %s diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml index 557f43d778e58..5a2ec2be3c94a 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml @@ -4,7 +4,7 @@ # RUN: split-file %s %t # RUN: yaml2obj %t/call_sites.exe.yaml -o %t/call_sites.exe -# RUN: llvm-gsymutil --convert=%t/call_sites.exe --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_exe.gsym +# RUN: llvm-gsymutil --num-threads=1 --convert=%t/call_sites.exe --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_exe.gsym # Dump the GSYM file and check the output for callsite information # RUN: llvm-gsymutil %t/call_sites_exe.gsym | FileCheck --check-prefix=CHECK-GSYM %s diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test index 2d082ed6477f1..64c18669264ef 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test @@ -2,7 +2,7 @@ // Assemble the input assembly code into an object file // RUN: llc -enable-machine-outliner=never -mtriple arm64-apple-darwin -filetype=obj %t/call_sites.ll -o %t/call_sites.o -// RUN: llvm-gsymutil --convert=%t/call_sites.o --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_obj.gsym +// RUN: llvm-gsymutil --num-threads=1 --convert=%t/call_sites.o --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_obj.gsym // Dump the GSYM file and check the output for callsite information // RUN: llvm-gsymutil %t/call_sites_obj.gsym | FileCheck --check-prefix=CHECK-GSYM %s diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml index 4cecc79c72b4b..5001ffdeab9e2 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml @@ -3,8 +3,8 @@ # RUN: split-file %s %t # RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM -# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym -# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym +# RUN: llvm-gsymutil --num-threads=1 --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym +# RUN: llvm-gsymutil --num-threads=1 --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym # Dump the GSYM file and check the output for callsite information # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml index 522c576854421..6bf359cbe1ee1 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml @@ -1,11 +1,11 @@ # RUN: yaml2obj %s -o %t.dSYM ## Verify that we don't keep merged functions by default -# RUN: llvm-gsymutil --convert %t.dSYM --out-file=%t.default.gSYM +# RUN: llvm-gsymutil --num-threads=1 --convert %t.dSYM --out-file=%t.default.gSYM # RUN: llvm-gsymutil --verify --verbose %t.default.gSYM | FileCheck --check-prefix=CHECK-GSYM-DEFAULT %s ## Verify that we keep merged functions when specyfing --merged-functions -# RUN: llvm-gsymutil --convert %t.dSYM --out-file=%t.keep.gSYM --merged-functions +# RUN: llvm-gsymutil --num-threads=1 --convert %t.dSYM --out-file=%t.keep.gSYM --merged-functions # RUN: llvm-gsymutil --verify --verbose %t.keep.gSYM | FileCheck --check-prefix=CHECK-GSYM-KEEP %s ## Note: For identical functions, the dSYM / gSYM cannot be counted on to be deterministic. @@ -13,7 +13,7 @@ # CHECK-GSYM-DEFAULT-NOT: Merged FunctionInfos -# CHECK-GSYM-DEFAULT: FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}" +# CHECK-GSYM-DEFAULT: FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_03" # CHECK-GSYM-KEEP: Address Table: @@ -30,52 +30,51 @@ # CHECK-GSYM-KEEP-NEXT: INDEX DIRECTORY BASENAME PATH # CHECK-GSYM-KEEP-NEXT: ====== ========== ========== ============================== # CHECK-GSYM-KEEP-NEXT: [ 0] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} -# CHECK-GSYM-KEEP-NEXT: [ 1] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp -# CHECK-GSYM-KEEP-NEXT: [ 2] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp -# CHECK-GSYM-KEEP-NEXT: [ 3] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp +# CHECK-GSYM-KEEP-NEXT: [ 1] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp +# CHECK-GSYM-KEEP-NEXT: [ 2] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp +# CHECK-GSYM-KEEP-NEXT: [ 3] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp -# CHECK-GSYM-KEEP: FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}" +# CHECK-GSYM-KEEP: FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_01" # CHECK-GSYM-KEEP-NEXT: LineTable: -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:7 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:9 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:8 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:11 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:5 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:7 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:9 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:8 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:11 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:10 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:6 # CHECK-GSYM-KEEP-NEXT: ++ Merged FunctionInfos[0]: -# CHECK-GSYM-KEEP-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}" +# CHECK-GSYM-KEEP-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_02" # CHECK-GSYM-KEEP-NEXT: LineTable: -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:7 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:9 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:8 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:11 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:5 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:7 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:9 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:8 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:11 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:10 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:6 # CHECK-GSYM-KEEP-NEXT: ++ Merged FunctionInfos[1]: -# CHECK-GSYM-KEEP-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}" +# CHECK-GSYM-KEEP-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_03" # CHECK-GSYM-KEEP-NEXT: LineTable: -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:7 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:9 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:8 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:11 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10 -# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:5 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:7 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:9 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:8 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:11 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:10 +# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:6 ## Test the lookup functionality for merged functions: # RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s # RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s -#### TODO: Fix non-determinism leading that is currently worked around with `{{[1-3]}}` below. # CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248: -# CHECK-MERGED-LOOKUP-NEXT: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5 -# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5 -# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5 +# CHECK-MERGED-LOOKUP-NEXT: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:5 +# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_02 @ /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:5 +# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_03 @ /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:5 -# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5 +# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:5 --- !mach-o From c1c50c7a3ef9ced4a9b01e0afd83b83d7060fff9 Mon Sep 17 00:00:00 2001 From: Thor Preimesberger <111035711+cheezeburglar@users.noreply.github.com> Date: Thu, 9 Jan 2025 17:20:48 -0600 Subject: [PATCH 73/79] [SDPatternMatch] Add matchers m_ExtractSubvector and m_InsertSubvector (#120212) Fixes #118846 --- llvm/include/llvm/CodeGen/SDPatternMatch.h | 12 ++++++ .../CodeGen/SelectionDAGPatternMatchTest.cpp | 38 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index fc8ef717c74f6..2ccefa33abbf8 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -514,6 +514,12 @@ m_InsertElt(const T0_P &Vec, const T1_P &Val, const T2_P &Idx) { Idx); } +template +inline TernaryOpc_match +m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx) { + return TernaryOpc_match(ISD::INSERT_SUBVECTOR, Base, Sub, Idx); +} + // === Binary operations === template @@ -846,6 +852,12 @@ inline BinaryOpc_match m_ExtractElt(const LHS &Vec, const RHS &Idx) { return BinaryOpc_match(ISD::EXTRACT_VECTOR_ELT, Vec, Idx); } +template +inline BinaryOpc_match m_ExtractSubvector(const LHS &Vec, + const RHS &Idx) { + return BinaryOpc_match(ISD::EXTRACT_SUBVECTOR, Vec, Idx); +} + // === Unary operations === template struct UnaryOpc_match { unsigned Opcode; diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp index a2e1e588d03de..9b759ef19efe1 100644 --- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp @@ -165,9 +165,15 @@ TEST_F(SelectionDAGPatternMatchTest, matchTernaryOp) { SDValue Select = DAG->getSelect(DL, MVT::i1, Cond, T, F); auto VInt32VT = EVT::getVectorVT(Context, Int32VT, 4); + auto SmallVInt32VT = EVT::getVectorVT(Context, Int32VT, 2); + auto Idx0 = DAG->getVectorIdxConstant(0, DL); + auto Idx3 = DAG->getVectorIdxConstant(3, DL); SDValue V1 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 6, VInt32VT); SDValue V2 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 7, VInt32VT); + SDValue V3 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 8, SmallVInt32VT); SDValue VSelect = DAG->getNode(ISD::VSELECT, DL, VInt32VT, Cond, V1, V2); + SDValue InsertSubvector = + DAG->getNode(ISD::INSERT_SUBVECTOR, DL, VInt32VT, V2, V3, Idx0); SDValue ExtractELT = DAG->getNode(ISD::EXTRACT_VECTOR_ELT, DL, Int32VT, V1, Op3); @@ -209,15 +215,33 @@ TEST_F(SelectionDAGPatternMatchTest, matchTernaryOp) { EXPECT_TRUE(sd_match(ExtractELT, m_ExtractElt(m_Value(), m_Value()))); EXPECT_TRUE(sd_match(ExtractELT, m_ExtractElt(m_Value(), m_ConstInt()))); EXPECT_TRUE(sd_match(ExtractELT, m_ExtractElt(m_Value(), m_SpecificInt(1)))); + + EXPECT_TRUE(sd_match(InsertSubvector, + m_InsertSubvector(m_Value(), m_Value(), m_Value()))); + EXPECT_TRUE(sd_match( + InsertSubvector, + m_InsertSubvector(m_Specific(V2), m_Specific(V3), m_Specific(Idx0)))); + EXPECT_TRUE(sd_match( + InsertSubvector, + m_InsertSubvector(m_Specific(V2), m_Specific(V3), m_SpecificInt(0)))); + EXPECT_FALSE(sd_match( + InsertSubvector, + m_InsertSubvector(m_Specific(V2), m_Specific(V3), m_Specific(Idx3)))); + EXPECT_FALSE(sd_match( + InsertSubvector, + m_InsertSubvector(m_Specific(V2), m_Specific(V3), m_SpecificInt(3)))); } TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) { SDLoc DL; auto Int32VT = EVT::getIntegerVT(Context, 32); auto Float32VT = EVT::getFloatingPointVT(32); + auto BigVInt32VT = EVT::getVectorVT(Context, Int32VT, 8); auto VInt32VT = EVT::getVectorVT(Context, Int32VT, 4); SDValue V1 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 6, VInt32VT); + auto Idx0 = DAG->getVectorIdxConstant(0, DL); + auto Idx1 = DAG->getVectorIdxConstant(1, DL); SDValue Op0 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); SDValue Op1 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 2, Int32VT); @@ -260,6 +284,10 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) { SDValue SFAdd = DAG->getNode(ISD::STRICT_FADD, DL, {Float32VT, MVT::Other}, {DAG->getEntryNode(), Op2, Op2}); + SDValue Vec = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 9, BigVInt32VT); + SDValue SubVec = + DAG->getNode(ISD::EXTRACT_SUBVECTOR, DL, VInt32VT, Vec, Idx0); + SDValue InsertELT = DAG->getNode(ISD::INSERT_VECTOR_ELT, DL, VInt32VT, V1, Op0, Op4); @@ -320,6 +348,16 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) { EXPECT_FALSE(sd_match(SFAdd, m_ChainedBinOp(ISD::STRICT_FADD, m_OtherVT(), m_SpecificVT(Float32VT)))); + EXPECT_TRUE(sd_match(SubVec, m_ExtractSubvector(m_Value(), m_Value()))); + EXPECT_TRUE( + sd_match(SubVec, m_ExtractSubvector(m_Specific(Vec), m_Specific(Idx0)))); + EXPECT_TRUE( + sd_match(SubVec, m_ExtractSubvector(m_Specific(Vec), m_SpecificInt(0)))); + EXPECT_FALSE( + sd_match(SubVec, m_ExtractSubvector(m_Specific(Vec), m_Specific(Idx1)))); + EXPECT_FALSE( + sd_match(SubVec, m_ExtractSubvector(m_Specific(Vec), m_SpecificInt(1)))); + EXPECT_TRUE( sd_match(InsertELT, m_InsertElt(m_Value(), m_Value(), m_Value()))); EXPECT_TRUE( From 504f6ce0c25b5cd721622cd444e9c428f400fd73 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Fri, 10 Jan 2025 07:35:50 +0800 Subject: [PATCH 74/79] [clang-tidy][NFC] clean readability-use-std-min-max (#122288) 1. add `static` for internal linkage functions 2. remove `clang` prefix for `QualType` --- .../readability/UseStdMinMaxCheck.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp b/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp index 9c5c2f3939c99..179173502a8d0 100644 --- a/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp @@ -59,7 +59,7 @@ static bool maxCondition(const BinaryOperator::Opcode Op, const Expr *CondLhs, return false; } -QualType getNonTemplateAlias(QualType QT) { +static QualType getNonTemplateAlias(QualType QT) { while (true) { // cast to a TypedefType if (const TypedefType *TT = dyn_cast(QT)) { @@ -92,15 +92,15 @@ static std::string createReplacement(const Expr *CondLhs, const Expr *CondRhs, const llvm::StringRef AssignLhsStr = Lexer::getSourceText( Source.getExpansionRange(AssignLhs->getSourceRange()), Source, LO); - clang::QualType GlobalImplicitCastType; - clang::QualType LhsType = CondLhs->getType() - .getCanonicalType() - .getNonReferenceType() - .getUnqualifiedType(); - clang::QualType RhsType = CondRhs->getType() - .getCanonicalType() - .getNonReferenceType() - .getUnqualifiedType(); + QualType GlobalImplicitCastType; + QualType LhsType = CondLhs->getType() + .getCanonicalType() + .getNonReferenceType() + .getUnqualifiedType(); + QualType RhsType = CondRhs->getType() + .getCanonicalType() + .getNonReferenceType() + .getUnqualifiedType(); if (LhsType != RhsType) { GlobalImplicitCastType = getNonTemplateAlias(BO->getLHS()->getType()); } From a6aa9365f75c6f28c3d281c662dcdb6fb5222601 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Thu, 9 Jan 2025 15:57:32 -0800 Subject: [PATCH 75/79] [NFC][AsmPrinter] Pass MJTI by const reference instead of const pointer (#122365) The caller `AsmPrinter::emitJumpTableInfo` checks [1] `MJTI` is not a null pointer before calling `emitJumpTableEntry` or `emitJumpTableSizesSection`. This patch updates callee function's signature to accept const reference, this way it's explicit `MJTI` won't be nullptr inside the callee. [1] https://github.com/llvm/llvm-project/blob/9d5299eb61a64cd4df5fefa0299b0cf8d917978f/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp#L2857 --- llvm/include/llvm/CodeGen/AsmPrinter.h | 4 ++-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index c9a88d7b1c015..bf491096e3c47 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -892,10 +892,10 @@ class AsmPrinter : public MachineFunctionPass { // Internal Implementation Details //===------------------------------------------------------------------===// - void emitJumpTableEntry(const MachineJumpTableInfo *MJTI, + void emitJumpTableEntry(const MachineJumpTableInfo &MJTI, const MachineBasicBlock *MBB, unsigned uid) const; - void emitJumpTableSizesSection(const MachineJumpTableInfo *MJTI, + void emitJumpTableSizesSection(const MachineJumpTableInfo &MJTI, const Function &F) const; void emitLLVMUsedList(const ConstantArray *InitList); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3ba45900e4569..55c1d12a6fa8f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2922,19 +2922,19 @@ void AsmPrinter::emitJumpTableInfo() { // Defer MCAssembler based constant folding due to a performance issue. The // label differences will be evaluated at write time. for (const MachineBasicBlock *MBB : JTBBs) - emitJumpTableEntry(MJTI, MBB, JTI); + emitJumpTableEntry(*MJTI, MBB, JTI); } if (EmitJumpTableSizesSection) - emitJumpTableSizesSection(MJTI, F); + emitJumpTableSizesSection(*MJTI, F); if (!JTInDiffSection) OutStreamer->emitDataRegion(MCDR_DataRegionEnd); } -void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo *MJTI, +void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo &MJTI, const Function &F) const { - const std::vector &JT = MJTI->getJumpTables(); + const std::vector &JT = MJTI.getJumpTables(); if (JT.empty()) return; @@ -2982,17 +2982,17 @@ void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo *MJTI, /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the /// current stream. -void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, +void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo &MJTI, const MachineBasicBlock *MBB, unsigned UID) const { assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block"); const MCExpr *Value = nullptr; - switch (MJTI->getEntryKind()) { + switch (MJTI.getEntryKind()) { case MachineJumpTableInfo::EK_Inline: llvm_unreachable("Cannot emit EK_Inline jump table entry"); case MachineJumpTableInfo::EK_Custom32: Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry( - MJTI, MBB, UID, OutContext); + &MJTI, MBB, UID, OutContext); break; case MachineJumpTableInfo::EK_BlockAddress: // EK_BlockAddress - Each entry is a plain address of block, e.g.: @@ -3026,7 +3026,7 @@ void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, // If the .set directive avoids relocations, this is emitted as: // .set L4_5_set_123, LBB123 - LJTI1_2 // .word L4_5_set_123 - if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && + if (MJTI.getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && MAI->doesSetDirectiveSuppressReloc()) { Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()), OutContext); @@ -3042,7 +3042,7 @@ void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); - unsigned EntrySize = MJTI->getEntrySize(getDataLayout()); + unsigned EntrySize = MJTI.getEntrySize(getDataLayout()); OutStreamer->emitValue(Value, EntrySize); } From 5d88a84ecddab3471693e44b57a1c1f21ce14f3f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 9 Jan 2025 15:46:44 -0800 Subject: [PATCH 76/79] [RISCV] Simplify some RISCVInstrInfoC classes by removing arguments that never change. NFC --- llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 38 +++++++++++------------ llvm/lib/Target/RISCV/RISCVInstrInfoZc.td | 2 +- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index ce994206cd785..4438957b0dc90 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -259,9 +259,8 @@ class CStore_rri funct3, string OpcodeStr, OpcodeStr, "$rs2, ${imm}(${rs1})">; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class Bcz funct3, string OpcodeStr, - RegisterClass cls> - : RVInst16CB funct3, string OpcodeStr> + : RVInst16CB { let isBranch = 1; let isTerminator = 1; @@ -273,9 +272,9 @@ class Bcz funct3, string OpcodeStr, } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class Shift_right funct2, string OpcodeStr, RegisterClass cls, - Operand ImmOpnd> - : RVInst16CB<0b100, 0b01, (outs cls:$rd), (ins cls:$rs1, ImmOpnd:$imm), +class Shift_right funct2, string OpcodeStr> + : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), + (ins GPRC:$rs1, uimmlog2xlennonzero:$imm), OpcodeStr, "$rs1, $imm"> { let Constraints = "$rs1 = $rd"; let Inst{12} = imm{5}; @@ -284,10 +283,9 @@ class Shift_right funct2, string OpcodeStr, RegisterClass cls, } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class CA_ALU funct6, bits<2> funct2, string OpcodeStr, - RegisterClass cls> - : RVInst16CA { +class CA_ALU funct6, bits<2> funct2, string OpcodeStr> + : RVInst16CA { bits<3> rd; let Constraints = "$rd = $rd_wb"; let Inst{9-7} = rd; @@ -465,9 +463,9 @@ def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd), let Inst{6-2} = imm{4-0}; } -def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>, +def C_SRLI : Shift_right<0b00, "c.srli">, Sched<[WriteShiftImm, ReadShiftImm]>; -def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>, +def C_SRAI : Shift_right<0b01, "c.srai">, Sched<[WriteShiftImm, ReadShiftImm]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in @@ -480,19 +478,19 @@ def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), (ins GPRC:$rs1, simm6:$imm let Inst{6-2} = imm{4-0}; } -def C_SUB : CA_ALU<0b100011, 0b00, "c.sub", GPRC>, +def C_SUB : CA_ALU<0b100011, 0b00, "c.sub">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def C_XOR : CA_ALU<0b100011, 0b01, "c.xor", GPRC>, +def C_XOR : CA_ALU<0b100011, 0b01, "c.xor">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def C_OR : CA_ALU<0b100011, 0b10, "c.or" , GPRC>, +def C_OR : CA_ALU<0b100011, 0b10, "c.or">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -def C_AND : CA_ALU<0b100011, 0b11, "c.and", GPRC>, +def C_AND : CA_ALU<0b100011, 0b11, "c.and">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; let Predicates = [HasStdExtCOrZca, IsRV64] in { -def C_SUBW : CA_ALU<0b100111, 0b00, "c.subw", GPRC>, +def C_SUBW : CA_ALU<0b100111, 0b00, "c.subw">, Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; -def C_ADDW : CA_ALU<0b100111, 0b01, "c.addw", GPRC>, +def C_ADDW : CA_ALU<0b100111, 0b01, "c.addw">, Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; } @@ -504,8 +502,8 @@ def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset), let isBarrier=1; } -def C_BEQZ : Bcz<0b110, "c.beqz", GPRC>, Sched<[WriteJmp, ReadJmp]>; -def C_BNEZ : Bcz<0b111, "c.bnez", GPRC>, Sched<[WriteJmp, ReadJmp]>; +def C_BEQZ : Bcz<0b110, "c.beqz">, Sched<[WriteJmp, ReadJmp]>; +def C_BNEZ : Bcz<0b111, "c.bnez">, Sched<[WriteJmp, ReadJmp]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td index bff740a33c1c1..5cc16765d4ae2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -186,7 +186,7 @@ def C_ZEXT_B : RVZcArith_r<0b11000 , "c.zext.b">, Sched<[WriteIALU, ReadIALU]>; let Predicates = [HasStdExtZcb, HasStdExtZmmul] in -def C_MUL : CA_ALU<0b100111, 0b10, "c.mul", GPRC>, +def C_MUL : CA_ALU<0b100111, 0b10, "c.mul">, Sched<[WriteIMul, ReadIMul, ReadIMul]>; let Predicates = [HasStdExtZcb] in { From 24bf0e4eb63fe5eebf86f937ff41b66be78cf958 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 10 Jan 2025 01:37:24 +0100 Subject: [PATCH 77/79] [libc++] Disable _LIBCPP_NODEBUG temporarily (#122393) This should be reverted once the crash reported in #118710 has been analyzed. --- libcxx/include/__config | 4 +++- libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index ace6e1cd73e3e..2de4c009b5afd 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1170,7 +1170,9 @@ typedef __char32_t char32_t; # define _LIBCPP_NOESCAPE # endif -# define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]] +// FIXME: Expand this to [[__gnu__::__nodebug__]] again once the testcase reported in +// https://github.com/llvm/llvm-project/pull/118710 has been analyzed +# define _LIBCPP_NODEBUG # if __has_attribute(__standalone_debug__) # define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__)) diff --git a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp index bc7c8ce7ec443..f49f3e3c615ca 100644 --- a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp +++ b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp @@ -27,7 +27,7 @@ class LibcxxTestModule : public clang::tidy::ClangTidyModule { check_factories.registerCheck("libcpp-header-exportable-declarations"); check_factories.registerCheck("libcpp-hide-from-abi"); check_factories.registerCheck("libcpp-internal-ftms"); - check_factories.registerCheck("libcpp-nodebug-on-aliases"); + // check_factories.registerCheck("libcpp-nodebug-on-aliases"); check_factories.registerCheck("libcpp-cpp-version-check"); check_factories.registerCheck("libcpp-robust-against-adl"); check_factories.registerCheck("libcpp-uglify-attributes"); From 04e54cc19f13af26e453c06b6a9e2bc3187e52c2 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 19:39:12 -0500 Subject: [PATCH 78/79] [RISCV][VLOPT] Add Vector Single-Width Averaging Add and Subtract to isSupportedInstr (#122351) --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 9 + llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 176 +++++++++++++++++++ 2 files changed, 185 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index ab73b112e6dd8..9870279ad17c7 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -963,6 +963,15 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMV_V_I: case RISCV::VMV_V_X: case RISCV::VMV_V_V: + // Vector Single-Width Averaging Add and Subtract + case RISCV::VAADDU_VV: + case RISCV::VAADDU_VX: + case RISCV::VAADD_VV: + case RISCV::VAADD_VX: + case RISCV::VASUBU_VV: + case RISCV::VASUBU_VX: + case RISCV::VASUB_VV: + case RISCV::VASUB_VX: // Vector Crypto case RISCV::VWSLL_VI: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index da7917cae8ba6..ce79bd5d5ddcf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3595,3 +3595,179 @@ define @vmerge_vim( %a, % %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %1, iXLen %vl) ret %2 } + +define @vaadd_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vaadd_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vaadd.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vaadd_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vaadd.vv v8, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vaadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vaadd_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vaadd_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vaadd.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vaadd_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vaadd.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vaadd.nxv4i32.nxv4i32( poison, %a, i32 %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vasub_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vasub_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vasub.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vasub_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vasub.vv v8, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vasub.nxv4i32.nxv4i32( poison, %a, %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vasub_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vasub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vasub.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vasub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vasub.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vasub.nxv4i32.nxv4i32( poison, %a, i32 %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vaaddu_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vaaddu_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vaaddu.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vaaddu_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vaaddu.vv v8, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vaaddu.nxv4i32.nxv4i32( poison, %a, %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vaaddu_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vaaddu_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vaaddu.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vaaddu_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vaaddu.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vaaddu.nxv4i32.nxv4i32( poison, %a, i32 %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vasubu_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vasubu_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vasubu.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vasubu_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vasubu.vv v8, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vasubu.nxv4i32.nxv4i32( poison, %a, %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vasubu_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vasubu_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vasubu.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vasubu_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vasubu.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vasubu.nxv4i32.nxv4i32( poison, %a, i32 %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} From d0373dbe7c8ca8e7cd9c84e1d93c43c81a085f0c Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 19:44:40 -0500 Subject: [PATCH 79/79] [RISCV][VLOPT] Add vadc to isSupportedInstr (#122345) --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 4 ++ llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 60 ++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 9870279ad17c7..9338e0a1c8741 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -948,6 +948,10 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMERGE_VIM: case RISCV::VMERGE_VVM: case RISCV::VMERGE_VXM: + // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions + case RISCV::VADC_VIM: + case RISCV::VADC_VVM: + case RISCV::VADC_VXM: // Vector Widening Integer Multiply-Add Instructions case RISCV::VWMACCU_VV: case RISCV::VWMACCU_VX: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index ce79bd5d5ddcf..ce94e1c193645 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3596,6 +3596,66 @@ define @vmerge_vim( %a, % ret %2 } +define @vadc_vvm( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vadc_vvm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vadc.vvm v8, v8, v10, v0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vadc_vvm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vadc.vvm v8, v8, v10, v0 +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vadc.nxv4i32.nxv4i32( poison, %a, %b, %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vadc_vxm( %a, i32 %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vadc_vxm: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vadc.vxm v8, v8, a0, v0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vadc_vxm: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vadc.vxm v8, v8, a0, v0 +; VLOPT-NEXT: vadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vadc.nxv4i32.i32( poison, %a, i32 %b, %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vadc_vim( %a, %c, iXLen %vl) { +; NOVLOPT-LABEL: vadc_vim: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vadc.vim v8, v8, 9, v0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vadc_vim: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vadc.vim v8, v8, 9, v0 +; VLOPT-NEXT: vadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vadc.nxv4i32.i32( poison, %a, i32 9, %c, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %1, iXLen %vl) + ret %2 +} + define @vaadd_vv( %a, %b, iXLen %vl) { ; NOVLOPT-LABEL: vaadd_vv: ; NOVLOPT: # %bb.0: