From a429dfc167258cf023b2e4c20874a228298372e6 Mon Sep 17 00:00:00 2001
From: Nick Sarnie <nick.sarnie@intel.com>
Date: Fri, 10 Jan 2025 00:59:21 +0900
Subject: [PATCH 01/79] [Driver][SPIR-V] Use consistent tools to convert
 between text and binary form (#120266)

Currently we produce SPIR-V text with `spirv-dis` but assemble it with
`llvm-spirv`. The SPIR-V text format is different between the tools so
the assemble fails. Use `spirv-as` for assembly as it uses the same
format.

---------

Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
---
 clang/lib/Driver/ToolChains/SPIRV.cpp | 49 +++++++++++++++++++++++++--
 clang/lib/Driver/ToolChains/SPIRV.h   | 19 +++++++++++
 clang/test/Driver/spirv-toolchain.cl  |  4 +--
 3 files changed, 67 insertions(+), 5 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/SPIRV.cpp b/clang/lib/Driver/ToolChains/SPIRV.cpp
index 659da5c7f25aa..5a7894f5435fc 100644
--- a/clang/lib/Driver/ToolChains/SPIRV.cpp
+++ b/clang/lib/Driver/ToolChains/SPIRV.cpp
@@ -26,8 +26,8 @@ void SPIRV::constructTranslateCommand(Compilation &C, const Tool &T,
   llvm::opt::ArgStringList CmdArgs(Args);
   CmdArgs.push_back(Input.getFilename());
 
-  if (Input.getType() == types::TY_PP_Asm)
-    CmdArgs.push_back("-to-binary");
+  assert(Input.getType() != types::TY_PP_Asm && "Unexpected input type");
+
   if (Output.getType() == types::TY_PP_Asm)
     CmdArgs.push_back("--spirv-tools-dis");
 
@@ -46,6 +46,31 @@ void SPIRV::constructTranslateCommand(Compilation &C, const Tool &T,
                                          Exec, CmdArgs, Input, Output));
 }
 
+void SPIRV::constructAssembleCommand(Compilation &C, const Tool &T,
+                                     const JobAction &JA,
+                                     const InputInfo &Output,
+                                     const InputInfo &Input,
+                                     const llvm::opt::ArgStringList &Args) {
+  llvm::opt::ArgStringList CmdArgs(Args);
+  CmdArgs.push_back(Input.getFilename());
+
+  assert(Input.getType() == types::TY_PP_Asm && "Unexpected input type");
+
+  CmdArgs.append({"-o", Output.getFilename()});
+
+  // Try to find "spirv-as-<LLVM_VERSION_MAJOR>". Otherwise, fall back to
+  // plain "spirv-as".
+  using namespace std::string_literals;
+  auto VersionedTool = "spirv-as-"s + std::to_string(LLVM_VERSION_MAJOR);
+  std::string ExeCand = T.getToolChain().GetProgramPath(VersionedTool.c_str());
+  if (!llvm::sys::fs::can_execute(ExeCand))
+    ExeCand = T.getToolChain().GetProgramPath("spirv-as");
+
+  const char *Exec = C.getArgs().MakeArgString(ExeCand);
+  C.addCommand(std::make_unique<Command>(JA, T, ResponseFileSupport::None(),
+                                         Exec, CmdArgs, Input, Output));
+}
+
 void SPIRV::Translator::ConstructJob(Compilation &C, const JobAction &JA,
                                      const InputInfo &Output,
                                      const InputInfoList &Inputs,
@@ -57,12 +82,29 @@ void SPIRV::Translator::ConstructJob(Compilation &C, const JobAction &JA,
   constructTranslateCommand(C, *this, JA, Output, Inputs[0], {});
 }
 
+void SPIRV::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                    const InputInfo &Output,
+                                    const InputInfoList &Inputs,
+                                    const ArgList &Args,
+                                    const char *AssembleOutput) const {
+  claimNoWarnArgs(Args);
+  if (Inputs.size() != 1)
+    llvm_unreachable("Invalid number of input files.");
+  constructAssembleCommand(C, *this, JA, Output, Inputs[0], {});
+}
+
 clang::driver::Tool *SPIRVToolChain::getTranslator() const {
   if (!Translator)
     Translator = std::make_unique<SPIRV::Translator>(*this);
   return Translator.get();
 }
 
+clang::driver::Tool *SPIRVToolChain::getAssembler() const {
+  if (!Assembler)
+    Assembler = std::make_unique<SPIRV::Assembler>(*this);
+  return Assembler.get();
+}
+
 clang::driver::Tool *SPIRVToolChain::SelectTool(const JobAction &JA) const {
   Action::ActionClass AC = JA.getKind();
   return SPIRVToolChain::getTool(AC);
@@ -73,8 +115,9 @@ clang::driver::Tool *SPIRVToolChain::getTool(Action::ActionClass AC) const {
   default:
     break;
   case Action::BackendJobClass:
-  case Action::AssembleJobClass:
     return SPIRVToolChain::getTranslator();
+  case Action::AssembleJobClass:
+    return SPIRVToolChain::getAssembler();
   }
   return ToolChain::getTool(AC);
 }
diff --git a/clang/lib/Driver/ToolChains/SPIRV.h b/clang/lib/Driver/ToolChains/SPIRV.h
index 415f639bba3ec..44187084e34ec 100644
--- a/clang/lib/Driver/ToolChains/SPIRV.h
+++ b/clang/lib/Driver/ToolChains/SPIRV.h
@@ -22,6 +22,11 @@ void constructTranslateCommand(Compilation &C, const Tool &T,
                                const InputInfo &Input,
                                const llvm::opt::ArgStringList &Args);
 
+void constructAssembleCommand(Compilation &C, const Tool &T,
+                              const JobAction &JA, const InputInfo &Output,
+                              const InputInfo &Input,
+                              const llvm::opt::ArgStringList &Args);
+
 class LLVM_LIBRARY_VISIBILITY Translator : public Tool {
 public:
   Translator(const ToolChain &TC)
@@ -47,6 +52,17 @@ class LLVM_LIBRARY_VISIBILITY Linker final : public Tool {
                     const char *LinkingOutput) const override;
 };
 
+class LLVM_LIBRARY_VISIBILITY Assembler final : public Tool {
+public:
+  Assembler(const ToolChain &TC) : Tool("SPIRV::Assembler", "spirv-as", TC) {}
+  bool hasIntegratedAssembler() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *AssembleOutput) const override;
+};
+
 } // namespace SPIRV
 } // namespace tools
 
@@ -54,6 +70,7 @@ namespace toolchains {
 
 class LLVM_LIBRARY_VISIBILITY SPIRVToolChain : public ToolChain {
   mutable std::unique_ptr<Tool> Translator;
+  mutable std::unique_ptr<Tool> Assembler;
 
 public:
   SPIRVToolChain(const Driver &D, const llvm::Triple &Triple,
@@ -81,6 +98,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRVToolChain : public ToolChain {
 
 private:
   clang::driver::Tool *getTranslator() const;
+  clang::driver::Tool *getAssembler() const;
+
   bool NativeLLVMSupport;
 };
 
diff --git a/clang/test/Driver/spirv-toolchain.cl b/clang/test/Driver/spirv-toolchain.cl
index eff02f809ce83..33c7bc0a63adf 100644
--- a/clang/test/Driver/spirv-toolchain.cl
+++ b/clang/test/Driver/spirv-toolchain.cl
@@ -43,7 +43,7 @@
 // Check assembly input -> object output
 // RUN: %clang -### --target=spirv64 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s
 // RUN: %clang -### --target=spirv32 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s
-// ASM: {{llvm-spirv.*"}} {{".*"}} "-to-binary" "-o" {{".*o"}}
+// ASM: {{spirv-as.*"}} {{".*"}} "-o" {{".*o"}}
 
 //-----------------------------------------------------------------------------
 // Check --save-temps.
@@ -56,7 +56,7 @@
 // TMP-SAME: "-o" [[BC:".*bc"]]
 // TMP-SAME: [[I]]
 // TMP: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" [[S:".*s"]]
-// TMP: {{llvm-spirv.*"}} [[S]] "-to-binary" "-o" {{".*o"}}
+// TMP: {{spirv-as.*"}} [[S]] "-o" {{".*o"}}
 
 //-----------------------------------------------------------------------------
 // Check linking when multiple input files are passed.

From 762f1b17b2815ccdfb4e5cb5412cb6210db92f73 Mon Sep 17 00:00:00 2001
From: Sarah Spall <sarahspall@microsoft.com>
Date: Thu, 9 Jan 2025 08:00:05 -0800
Subject: [PATCH 02/79] [HLSL] Make fast math the default for HLSL (#119820)

Make fast math the default for HLSL
Repair test cases broken by this change.
Closes #108597
---
 clang/include/clang/Driver/Options.td         |  2 +-
 clang/test/CodeGenHLSL/ArrayTemporary.hlsl    |  4 +-
 .../BasicFeatures/OutputArguments.hlsl        |  4 +-
 .../standard_conversion_sequences.hlsl        |  6 +-
 .../CodeGenHLSL/builtins/ScalarSwizzles.hlsl  |  2 +-
 .../StructuredBuffers-methods-lib.hlsl        |  6 +-
 .../StructuredBuffers-methods-ps.hlsl         |  2 +-
 .../CodeGenHLSL/builtins/WaveReadLaneAt.hlsl  | 12 +--
 clang/test/CodeGenHLSL/builtins/abs.hlsl      | 64 ++++++-------
 clang/test/CodeGenHLSL/builtins/acos.hlsl     | 24 ++---
 clang/test/CodeGenHLSL/builtins/asdouble.hlsl |  4 +-
 clang/test/CodeGenHLSL/builtins/asin.hlsl     | 24 ++---
 clang/test/CodeGenHLSL/builtins/atan.hlsl     | 24 ++---
 clang/test/CodeGenHLSL/builtins/atan2.hlsl    | 24 ++---
 clang/test/CodeGenHLSL/builtins/ceil.hlsl     | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/clamp.hlsl    | 72 +++++++-------
 clang/test/CodeGenHLSL/builtins/clip.hlsl     |  8 +-
 clang/test/CodeGenHLSL/builtins/cos.hlsl      | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/cosh.hlsl     | 24 ++---
 clang/test/CodeGenHLSL/builtins/cross.hlsl    | 14 +--
 clang/test/CodeGenHLSL/builtins/degrees.hlsl  | 32 +++----
 .../CodeGenHLSL/builtins/dot-builtin.hlsl     | 16 ++--
 clang/test/CodeGenHLSL/builtins/dot.hlsl      | 26 ++---
 clang/test/CodeGenHLSL/builtins/exp.hlsl      | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/exp2.hlsl     | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/floor.hlsl    | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/fmod.hlsl     | 24 ++---
 clang/test/CodeGenHLSL/builtins/frac.hlsl     | 32 +++----
 clang/test/CodeGenHLSL/builtins/length.hlsl   | 40 ++++----
 .../CodeGenHLSL/builtins/lerp-builtin.hlsl    |  4 +-
 clang/test/CodeGenHLSL/builtins/lerp.hlsl     | 32 +++----
 clang/test/CodeGenHLSL/builtins/log.hlsl      | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/log10.hlsl    | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/log2.hlsl     | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/mad.hlsl      | 32 +++----
 clang/test/CodeGenHLSL/builtins/max.hlsl      | 64 ++++++-------
 clang/test/CodeGenHLSL/builtins/min.hlsl      | 64 ++++++-------
 .../test/CodeGenHLSL/builtins/normalize.hlsl  | 32 +++----
 clang/test/CodeGenHLSL/builtins/pow.hlsl      | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/radians.hlsl  | 32 +++----
 clang/test/CodeGenHLSL/builtins/rcp.hlsl      | 96 +++++++++----------
 clang/test/CodeGenHLSL/builtins/round.hlsl    | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/rsqrt.hlsl    | 32 +++----
 clang/test/CodeGenHLSL/builtins/saturate.hlsl | 32 +++----
 clang/test/CodeGenHLSL/builtins/sin.hlsl      | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/sinh.hlsl     | 24 ++---
 clang/test/CodeGenHLSL/builtins/sqrt.hlsl     | 48 +++++-----
 clang/test/CodeGenHLSL/builtins/step.hlsl     | 32 +++----
 clang/test/CodeGenHLSL/builtins/tan.hlsl      | 24 ++---
 clang/test/CodeGenHLSL/builtins/tanh.hlsl     | 24 ++---
 clang/test/CodeGenHLSL/builtins/trunc.hlsl    | 48 +++++-----
 clang/test/CodeGenHLSL/float3.hlsl            |  2 +-
 clang/test/CodeGenHLSL/this-reference.hlsl    |  2 +-
 53 files changed, 821 insertions(+), 821 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 52823430919de..eb860f73121fd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2285,7 +2285,7 @@ def ffp_exception_behavior_EQ : Joined<["-"], "ffp-exception-behavior=">, Group<
   NormalizedValues<["FPE_Ignore", "FPE_MayTrap", "FPE_Strict"]>,
   MarshallingInfoEnum<LangOpts<"FPExceptionMode">, "FPE_Default">;
 defm fast_math : BoolFOption<"fast-math",
-  LangOpts<"FastMath">, DefaultFalse,
+  LangOpts<"FastMath">, Default<hlsl.KeyPath>,
   PosFlag<SetTrue, [], [ClangOption, CC1Option, FC1Option, FlangOption],
           "Allow aggressive, lossy floating-point optimizations",
           [cl_fast_relaxed_math.KeyPath]>,
diff --git a/clang/test/CodeGenHLSL/ArrayTemporary.hlsl b/clang/test/CodeGenHLSL/ArrayTemporary.hlsl
index 7d77c0aff736c..e5db7eac37a42 100644
--- a/clang/test/CodeGenHLSL/ArrayTemporary.hlsl
+++ b/clang/test/CodeGenHLSL/ArrayTemporary.hlsl
@@ -90,11 +90,11 @@ void template_call(float FA2[2], float FA4[4], int IA3[3]) {
 
 // CHECK: [[Addr:%.*]] = getelementptr inbounds [2 x float], ptr [[FA2]], i32 0, i32 0
 // CHECK: [[Tmp:%.*]] = load float, ptr [[Addr]]
-// CHECK: call void @_Z11template_fnIfEvT_(float noundef [[Tmp]])
+// CHECK: call void @_Z11template_fnIfEvT_(float noundef nofpclass(nan inf) [[Tmp]])
 
 // CHECK: [[Idx0:%.*]] = getelementptr inbounds [2 x float], ptr [[FA2]], i32 0, i32 0
 // CHECK: [[Val0:%.*]] = load float, ptr [[Idx0]]
-// CHECK: [[Sum:%.*]] = fadd float [[Val0]], 5.000000e+00
+// CHECK: [[Sum:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[Val0]], 5.000000e+00
 // CHECK: [[Idx1:%.*]] = getelementptr inbounds [2 x float], ptr [[FA2]], i32 0, i32 1
 // CHECK: store float [[Sum]], ptr [[Idx1]]
 
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index 6afead4f23366..289bbb959e8a4 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -9,7 +9,7 @@
 // CHECK: define void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 4 dereferenceable(4) {{%.*}})
 void trunc_Param(inout int X) {}
 
-// ALL-LABEL: define noundef float {{.*}}case1
+// ALL-LABEL: define noundef nofpclass(nan inf) float {{.*}}case1
 // CHECK: [[F:%.*]] = alloca float
 // CHECK: [[ArgTmp:%.*]] = alloca i32
 // CHECK: [[FVal:%.*]] = load float, ptr {{.*}}
@@ -197,7 +197,7 @@ export int case7() {
 // CHECK: define void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) {{%.*}})
 void trunc_vec(inout int3 V) {}
 
-// ALL-LABEL: define noundef <3 x float> {{.*}}case8
+// ALL-LABEL: define noundef nofpclass(nan inf) <3 x float> {{.*}}case8
 
 // CHECK: [[V:%.*]] = alloca <3 x float>
 // CHECK: [[Tmp:%.*]] = alloca <3 x i32>
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
index dd7dfd1769703..1665a0260ab05 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@@ -6,7 +6,7 @@
 // CHECK: store <3 x float> splat (float 1.000000e+00), ptr [[f3]]
 // CHECK: [[vecf3:%.*]] = load <3 x float>, ptr [[f3]]
 // CHECK: [[vecf4:%.*]] = shufflevector <3 x float> [[vecf3]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
-// CHECK: [[vecd4:%.*]] = fpext <4 x float> [[vecf4]] to <4 x double>
+// CHECK: [[vecd4:%.*]] = fpext reassoc nnan ninf nsz arcp afn <4 x float> [[vecf4]] to <4 x double>
 // CHECK: store <4 x double> [[vecd4]], ptr [[d4]]
 void f3_to_d4() {
   vector<float,3> f3 = 1.0;
@@ -30,7 +30,7 @@ void f3_to_f2() {
 // CHECK: [[f2:%.*]] = alloca <2 x float>
 // CHECK: store <4 x double> splat (double 3.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[vecf4:%.*]] = fptrunc <4 x double> [[vecd4]] to <4 x float>
+// CHECK: [[vecf4:%.*]] = fptrunc reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x float>
 // CHECK: [[vecf2:%.*]] = shufflevector <4 x float> [[vecf4]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
 // CHECK: store <2 x float> [[vecf2]], ptr [[f2]]
 void d4_to_f2() {
@@ -108,7 +108,7 @@ void i2_to_b2() {
 // CHECK: [[b2:%.*]] = alloca i8
 // CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[vecb4:%.*]] = fcmp une <4 x double> [[vecd4]], zeroinitializer
+// CHECK: [[vecb4:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <4 x double> [[vecd4]], zeroinitializer
 // CHECK: [[vecd2:%.*]] = shufflevector <4 x i1> [[vecb4]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
 // CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecd2]], <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 // CHECK: [[i8:%.*]] = bitcast <8 x i1> [[vecb8]] to i8
diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
index 94a95107eea69..97711c9ee25a1 100644
--- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
@@ -119,7 +119,7 @@ float2 HowManyFloats(float V) {
 // CHECK: store <1 x double> splat (double 1.000000e+00), ptr [[Tmp]], align 8
 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[Tmp]], align 8
 // CHECK: [[vec3:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <3 x i32> zeroinitializer
-// CHECK: [[vec3f:%.*]] = fptrunc <3 x double> [[vec3]] to <3 x float>
+// CHECK: [[vec3f:%.*]] = fptrunc reassoc nnan ninf nsz arcp afn <3 x double> [[vec3]] to <3 x float>
 // CHECK: ret <3 x float> [[vec3f]]
 
 float3 AllRighty() {
diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl
index 383f591f676d9..93aa218f63ecf 100644
--- a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl
@@ -33,7 +33,7 @@ export void TestAppend(float value) {
     ASB.Append(value);
 }
 
-// CHECK: define void @_Z10TestAppendf(float noundef %value)
+// CHECK: define void @_Z10TestAppendf(float noundef nofpclass(nan inf) %value)
 // CHECK-DXIL: %[[VALUE:.*]] = load float, ptr %value.addr, align 4
 // CHECK-DXIL: %[[INDEX:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 1)
 // CHECK-DXIL: %[[RESPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i32 %[[INDEX]])
@@ -43,7 +43,7 @@ export float TestConsume() {
     return CSB.Consume();
 }
 
-// CHECK: define noundef float @_Z11TestConsumev()
+// CHECK: define noundef nofpclass(nan inf) float @_Z11TestConsumev()
 // CHECK-DXIL: %[[INDEX:.*]] = call i32 @llvm.dx.resource.updatecounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %1, i8 -1)
 // CHECK-DXIL: %[[RESPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %0, i32 %[[INDEX]])
 // CHECK-DXIL: %[[VALUE:.*]] = load float, ptr %[[RESPTR]], align 4
@@ -53,7 +53,7 @@ export float TestLoad() {
     return RWSB1.Load(1) + SB1.Load(2);
 }
 
-// CHECK: define noundef float @_Z8TestLoadv()
+// CHECK: define noundef nofpclass(nan inf) float @_Z8TestLoadv()
 // CHECK: %[[PTR1:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i32 %{{[0-9]+}})
 // CHECK: %[[VALUE1:.*]] = load float, ptr %[[PTR1]]
 // CHECK: %[[PTR2:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_0_0t(target("dx.RawBuffer", float, 0, 0) %{{[0-9]+}}, i32 %{{[0-9]+}})
diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl
index 3d9f4f68ec7d2..5b1d8e3052eae 100644
--- a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl
@@ -28,7 +28,7 @@ export float TestLoad() {
     return ROSB1.Load(10);
 }
 
-// CHECK: define noundef float @_Z8TestLoadv()
+// CHECK: define noundef nofpclass(nan inf) float @_Z8TestLoadv()
 // CHECK: %[[PTR1:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %{{[0-9]+}}, i32 %{{[0-9]+}})
 // CHECK: %[[VALUE1:.*]] = load float, ptr %[[PTR1]]
 
diff --git a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
index 093a199a32bdc..d56a6af26896b 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
@@ -38,8 +38,8 @@ int16_t test_int16(int16_t expr, uint idx) {
 // CHECK-LABEL: test_half
 half test_half(half expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok2:]] = call token @llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok2]]) ]
-  // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY:.*]] @llvm.spv.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok2]]) ]
+  // CHECK-DXIL:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] @llvm.dx.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
 }
@@ -50,8 +50,8 @@ half test_half(half expr, uint idx) {
 // CHECK-LABEL: test_double
 double test_double(double expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok3:]] = call token @llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok3]]) ]
-  // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY:.*]] @llvm.spv.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok3]]) ]
+  // CHECK-DXIL:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] @llvm.dx.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
 }
@@ -62,8 +62,8 @@ double test_double(double expr, uint idx) {
 // CHECK-LABEL: test_floatv4
 float4 test_floatv4(float4 expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok4:]] = call token @llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET1:.*]] = call spir_func [[TY1:.*]] @llvm.spv.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok4]]) ]
-  // CHECK-DXIL:  %[[RET1:.*]] = call [[TY1:.*]] @llvm.dx.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]])
+  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok4]]) ]
+  // CHECK-DXIL:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY1]] %[[RET1]]
   return WaveReadLaneAt(expr, idx);
 }
diff --git a/clang/test/CodeGenHLSL/builtins/abs.hlsl b/clang/test/CodeGenHLSL/builtins/abs.hlsl
index 912e8a2834723..a8456a715082e 100644
--- a/clang/test/CodeGenHLSL/builtins/abs.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/abs.hlsl
@@ -22,25 +22,25 @@ int16_t3 test_abs_int16_t3(int16_t3 p0) { return abs(p0); }
 int16_t4 test_abs_int16_t4(int16_t4 p0) { return abs(p0); }
 #endif // __HLSL_ENABLE_16_BIT
 
-// NATIVE_HALF-LABEL: define noundef half @_Z13test_abs_half
-// NATIVE_HALF: call half @llvm.fabs.f16(
-// NO_HALF-LABEL: define noundef float @_Z13test_abs_half
-// NO_HALF: call float @llvm.fabs.f32(float %0)
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_abs_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_abs_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float %0)
 half test_abs_half(half p0) { return abs(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_abs_half2
-// NATIVE_HALF: call <2 x half> @llvm.fabs.v2f16(
-// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_abs_half2
-// NO_HALF: call <2 x float> @llvm.fabs.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_abs_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.fabs.v2f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_abs_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fabs.v2f32(
 half2 test_abs_half2(half2 p0) { return abs(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_abs_half3
-// NATIVE_HALF: call <3 x half> @llvm.fabs.v3f16(
-// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_abs_half3
-// NO_HALF: call <3 x float> @llvm.fabs.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_abs_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.fabs.v3f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_abs_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fabs.v3f32(
 half3 test_abs_half3(half3 p0) { return abs(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_abs_half4
-// NATIVE_HALF: call <4 x half> @llvm.fabs.v4f16(
-// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_abs_half4
-// NO_HALF: call <4 x float> @llvm.fabs.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_abs_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.fabs.v4f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_abs_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fabs.v4f32(
 half4 test_abs_half4(half4 p0) { return abs(p0); }
 
 // CHECK-LABEL: define noundef i32 @_Z12test_abs_int
@@ -56,17 +56,17 @@ int3 test_abs_int3(int3 p0) { return abs(p0); }
 // CHECK: call <4 x i32> @llvm.abs.v4i32(
 int4 test_abs_int4(int4 p0) { return abs(p0); }
 
-// CHECK-LABEL: define noundef float @_Z14test_abs_float
-// CHECK: call float @llvm.fabs.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_abs_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(
 float test_abs_float(float p0) { return abs(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z15test_abs_float2
-// CHECK: call <2 x float> @llvm.fabs.v2f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_abs_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.fabs.v2f32(
 float2 test_abs_float2(float2 p0) { return abs(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z15test_abs_float3
-// CHECK: call <3 x float> @llvm.fabs.v3f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_abs_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.fabs.v3f32(
 float3 test_abs_float3(float3 p0) { return abs(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z15test_abs_float4
-// CHECK: call <4 x float> @llvm.fabs.v4f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_abs_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.fabs.v4f32(
 float4 test_abs_float4(float4 p0) { return abs(p0); }
 
 // CHECK-LABEL: define noundef i64 @_Z16test_abs_int64_t
@@ -82,15 +82,15 @@ int64_t3 test_abs_int64_t3(int64_t3 p0) { return abs(p0); }
 // CHECK: call <4 x i64> @llvm.abs.v4i64(
 int64_t4 test_abs_int64_t4(int64_t4 p0) { return abs(p0); }
 
-// CHECK-LABEL: define noundef double @_Z15test_abs_double
-// CHECK: call double @llvm.fabs.f64(
+// CHECK-LABEL: define noundef nofpclass(nan inf) double @_Z15test_abs_double
+// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.fabs.f64(
 double test_abs_double(double p0) { return abs(p0); }
-// CHECK-LABEL: define noundef <2 x double> @_Z16test_abs_double2
-// CHECK: call <2 x double> @llvm.fabs.v2f64(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x double> @_Z16test_abs_double2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.fabs.v2f64(
 double2 test_abs_double2(double2 p0) { return abs(p0); }
-// CHECK-LABEL: define noundef <3 x double> @_Z16test_abs_double3
-// CHECK: call <3 x double> @llvm.fabs.v3f64(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x double> @_Z16test_abs_double3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.fabs.v3f64(
 double3 test_abs_double3(double3 p0) { return abs(p0); }
-// CHECK-LABEL: define noundef <4 x double> @_Z16test_abs_double4
-// CHECK: call <4 x double> @llvm.fabs.v4f64(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x double> @_Z16test_abs_double4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.fabs.v4f64(
 double4 test_abs_double4(double4 p0) { return abs(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/acos.hlsl b/clang/test/CodeGenHLSL/builtins/acos.hlsl
index 78a05cadfcd43..8152339a34e87 100644
--- a/clang/test/CodeGenHLSL/builtins/acos.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/acos.hlsl
@@ -7,53 +7,53 @@
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
 // CHECK-LABEL: test_acos_half
-// NATIVE_HALF: call half @llvm.acos.f16
-// NO_HALF: call float @llvm.acos.f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.acos.f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.acos.f32
 half test_acos_half ( half p0 ) {
   return acos ( p0 );
 }
 
 // CHECK-LABEL: test_acos_half2
-// NATIVE_HALF: call <2 x half> @llvm.acos.v2f16
-// NO_HALF: call <2 x float> @llvm.acos.v2f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.acos.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.acos.v2f32
 half2 test_acos_half2 ( half2 p0 ) {
   return acos ( p0 );
 }
 
 // CHECK-LABEL: test_acos_half3
-// NATIVE_HALF: call <3 x half> @llvm.acos.v3f16
-// NO_HALF: call <3 x float> @llvm.acos.v3f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.acos.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.acos.v3f32
 half3 test_acos_half3 ( half3 p0 ) {
   return acos ( p0 );
 }
 
 // CHECK-LABEL: test_acos_half4
-// NATIVE_HALF: call <4 x half> @llvm.acos.v4f16
-// NO_HALF: call <4 x float> @llvm.acos.v4f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.acos.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.acos.v4f32
 half4 test_acos_half4 ( half4 p0 ) {
   return acos ( p0 );
 }
 
 // CHECK-LABEL: test_acos_float
-// CHECK: call float @llvm.acos.f32
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.acos.f32
 float test_acos_float ( float p0 ) {
   return acos ( p0 );
 }
 
 // CHECK-LABEL: test_acos_float2
-// CHECK: call <2 x float> @llvm.acos.v2f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.acos.v2f32
 float2 test_acos_float2 ( float2 p0 ) {
   return acos ( p0 );
 }
 
 // CHECK-LABEL: test_acos_float3
-// CHECK: call <3 x float> @llvm.acos.v3f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.acos.v3f32
 float3 test_acos_float3 ( float3 p0 ) {
   return acos ( p0 );
 }
 
 // CHECK-LABEL: test_acos_float4
-// CHECK: call <4 x float> @llvm.acos.v4f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.acos.v4f32
 float4 test_acos_float4 ( float4 p0 ) {
   return acos ( p0 );
 }
diff --git a/clang/test/CodeGenHLSL/builtins/asdouble.hlsl b/clang/test/CodeGenHLSL/builtins/asdouble.hlsl
index f1c31107cdcad..bfc7fc97854c5 100644
--- a/clang/test/CodeGenHLSL/builtins/asdouble.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/asdouble.hlsl
@@ -18,7 +18,7 @@ double test_uint(uint low, uint high) {
   // CHECK-SPV-SAME: {{.*}} <i32 0, i32 1>
   // CHECK-SPV:      bitcast <2 x i32> %[[SHUFFLE0]] to double
 
-  // CHECK-DXIL: call double @llvm.dx.asdouble.i32
+  // CHECK-DXIL: call reassoc nnan ninf nsz arcp afn double @llvm.dx.asdouble.i32
   return asdouble(low, high);
 }
 
@@ -30,7 +30,7 @@ double3 test_vuint(uint3 low, uint3 high) {
   // CHECK-SPV-SAME: {{.*}} <i32 0, i32 3, i32 1, i32 4, i32 2, i32 5>
   // CHECK-SPV:      bitcast <6 x i32> %[[SHUFFLE1]] to <3 x double>
 
-  // CHECK-DXIL: call <3 x double> @llvm.dx.asdouble.v3i32
+  // CHECK-DXIL: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.dx.asdouble.v3i32
   return asdouble(low, high);
 }
 
diff --git a/clang/test/CodeGenHLSL/builtins/asin.hlsl b/clang/test/CodeGenHLSL/builtins/asin.hlsl
index 1d0b457e336f5..16efbba79670e 100644
--- a/clang/test/CodeGenHLSL/builtins/asin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/asin.hlsl
@@ -7,53 +7,53 @@
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
 // CHECK-LABEL: test_asin_half
-// NATIVE_HALF: call half @llvm.asin.f16
-// NO_HALF: call float @llvm.asin.f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.asin.f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.asin.f32
 half test_asin_half ( half p0 ) {
   return asin ( p0 );
 }
 
 // CHECK-LABEL: test_asin_half2
-// NATIVE_HALF: call <2 x half> @llvm.asin.v2f16
-// NO_HALF: call <2 x float> @llvm.asin.v2f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.asin.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.asin.v2f32
 half2 test_asin_half2 ( half2 p0 ) {
   return asin ( p0 );
 }
 
 // CHECK-LABEL: test_asin_half3
-// NATIVE_HALF: call <3 x half> @llvm.asin.v3f16
-// NO_HALF: call <3 x float> @llvm.asin.v3f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.asin.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.asin.v3f32
 half3 test_asin_half3 ( half3 p0 ) {
   return asin ( p0 );
 }
 
 // CHECK-LABEL: test_asin_half4
-// NATIVE_HALF: call <4 x half> @llvm.asin.v4f16
-// NO_HALF: call <4 x float> @llvm.asin.v4f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.asin.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.asin.v4f32
 half4 test_asin_half4 ( half4 p0 ) {
   return asin ( p0 );
 }
 
 // CHECK-LABEL: test_asin_float
-// CHECK: call float @llvm.asin.f32
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.asin.f32
 float test_asin_float ( float p0 ) {
   return asin ( p0 );
 }
 
 // CHECK-LABEL: test_asin_float2
-// CHECK: call <2 x float> @llvm.asin.v2f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.asin.v2f32
 float2 test_asin_float2 ( float2 p0 ) {
   return asin ( p0 );
 }
 
 // CHECK-LABEL: test_asin_float3
-// CHECK: call <3 x float> @llvm.asin.v3f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.asin.v3f32
 float3 test_asin_float3 ( float3 p0 ) {
   return asin ( p0 );
 }
 
 // CHECK-LABEL: test_asin_float4
-// CHECK: call <4 x float> @llvm.asin.v4f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.asin.v4f32
 float4 test_asin_float4 ( float4 p0 ) {
   return asin ( p0 );
 }
diff --git a/clang/test/CodeGenHLSL/builtins/atan.hlsl b/clang/test/CodeGenHLSL/builtins/atan.hlsl
index faee1227f3595..437835a863703 100644
--- a/clang/test/CodeGenHLSL/builtins/atan.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan.hlsl
@@ -7,53 +7,53 @@
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
 // CHECK-LABEL: test_atan_half
-// NATIVE_HALF: call half @llvm.atan.f16
-// NO_HALF: call float @llvm.atan.f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.atan.f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.atan.f32
 half test_atan_half ( half p0 ) {
   return atan ( p0 );
 }
 
 // CHECK-LABEL: test_atan_half2
-// NATIVE_HALF: call <2 x half> @llvm.atan.v2f16
-// NO_HALF: call <2 x float> @llvm.atan.v2f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.atan.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan.v2f32
 half2 test_atan_half2 ( half2 p0 ) {
   return atan ( p0 );
 }
 
 // CHECK-LABEL: test_atan_half3
-// NATIVE_HALF: call <3 x half> @llvm.atan.v3f16
-// NO_HALF: call <3 x float> @llvm.atan.v3f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.atan.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan.v3f32
 half3 test_atan_half3 ( half3 p0 ) {
   return atan ( p0 );
 }
 
 // CHECK-LABEL: test_atan_half4
-// NATIVE_HALF: call <4 x half> @llvm.atan.v4f16
-// NO_HALF: call <4 x float> @llvm.atan.v4f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.atan.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan.v4f32
 half4 test_atan_half4 ( half4 p0 ) {
   return atan ( p0 );
 }
 
 // CHECK-LABEL: test_atan_float
-// CHECK: call float @llvm.atan.f32
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.atan.f32
 float test_atan_float ( float p0 ) {
   return atan ( p0 );
 }
 
 // CHECK-LABEL: test_atan_float2
-// CHECK: call <2 x float> @llvm.atan.v2f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan.v2f32
 float2 test_atan_float2 ( float2 p0 ) {
   return atan ( p0 );
 }
 
 // CHECK-LABEL: test_atan_float3
-// CHECK: call <3 x float> @llvm.atan.v3f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan.v3f32
 float3 test_atan_float3 ( float3 p0 ) {
   return atan ( p0 );
 }
 
 // CHECK-LABEL: test_atan_float4
-// CHECK: call <4 x float> @llvm.atan.v4f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan.v4f32
 float4 test_atan_float4 ( float4 p0 ) {
   return atan ( p0 );
 }
diff --git a/clang/test/CodeGenHLSL/builtins/atan2.hlsl b/clang/test/CodeGenHLSL/builtins/atan2.hlsl
index 40796052e608f..53d115641e72f 100644
--- a/clang/test/CodeGenHLSL/builtins/atan2.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/atan2.hlsl
@@ -7,53 +7,53 @@
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
 // CHECK-LABEL: test_atan2_half
-// NATIVE_HALF: call half @llvm.atan2.f16
-// NO_HALF: call float @llvm.atan2.f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.atan2.f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.atan2.f32
 half test_atan2_half (half p0, half p1) {
   return atan2(p0, p1);
 }
 
 // CHECK-LABEL: test_atan2_half2
-// NATIVE_HALF: call <2 x half> @llvm.atan2.v2f16
-// NO_HALF: call <2 x float> @llvm.atan2.v2f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.atan2.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32
 half2 test_atan2_half2 (half2 p0, half2 p1) {
   return atan2(p0, p1);
 }
 
 // CHECK-LABEL: test_atan2_half3
-// NATIVE_HALF: call <3 x half> @llvm.atan2.v3f16
-// NO_HALF: call <3 x float> @llvm.atan2.v3f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.atan2.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan2.v3f32
 half3 test_atan2_half3 (half3 p0, half3 p1) {
   return atan2(p0, p1);
 }
 
 // CHECK-LABEL: test_atan2_half4
-// NATIVE_HALF: call <4 x half> @llvm.atan2.v4f16
-// NO_HALF: call <4 x float> @llvm.atan2.v4f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.atan2.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32
 half4 test_atan2_half4 (half4 p0, half4 p1) {
   return atan2(p0, p1);
 }
 
 // CHECK-LABEL: test_atan2_float
-// CHECK: call float @llvm.atan2.f32
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.atan2.f32
 float test_atan2_float (float p0, float p1) {
   return atan2(p0, p1);
 }
 
 // CHECK-LABEL: test_atan2_float2
-// CHECK: call <2 x float> @llvm.atan2.v2f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.atan2.v2f32
 float2 test_atan2_float2 (float2 p0, float2 p1) {
   return atan2(p0, p1);
 }
 
 // CHECK-LABEL: test_atan2_float3
-// CHECK: call <3 x float> @llvm.atan2.v3f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.atan2.v3f32
 float3 test_atan2_float3 (float3 p0, float3 p1) {
   return atan2(p0, p1);
 }
 
 // CHECK-LABEL: test_atan2_float4
-// CHECK: call <4 x float> @llvm.atan2.v4f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.atan2.v4f32
 float4 test_atan2_float4 (float4 p0, float4 p1) {
   return atan2(p0, p1);
 }
diff --git a/clang/test/CodeGenHLSL/builtins/ceil.hlsl b/clang/test/CodeGenHLSL/builtins/ceil.hlsl
index 3aa78ec0ebcca..fe0b8f8983838 100644
--- a/clang/test/CodeGenHLSL/builtins/ceil.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/ceil.hlsl
@@ -7,36 +7,36 @@
 
 using hlsl::ceil;
 
-// NATIVE_HALF-LABEL: define noundef half @_Z14test_ceil_half
-// NATIVE_HALF: call half @llvm.ceil.f16(
-// NO_HALF-LABEL: define noundef float @_Z14test_ceil_half
-// NO_HALF: call float @llvm.ceil.f32(float %0)
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z14test_ceil_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.ceil.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z14test_ceil_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.ceil.f32(float %0)
 half test_ceil_half(half p0) { return ceil(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z15test_ceil_half2
-// NATIVE_HALF: call <2 x half> @llvm.ceil.v2f16(
-// NO_HALF-LABEL: define noundef <2 x float> @_Z15test_ceil_half2
-// NO_HALF: call <2 x float> @llvm.ceil.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z15test_ceil_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.ceil.v2f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_ceil_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.ceil.v2f32(
 half2 test_ceil_half2(half2 p0) { return ceil(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z15test_ceil_half3
-// NATIVE_HALF: call <3 x half> @llvm.ceil.v3f16(
-// NO_HALF-LABEL: define noundef <3 x float> @_Z15test_ceil_half3
-// NO_HALF: call <3 x float> @llvm.ceil.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z15test_ceil_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.ceil.v3f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_ceil_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.ceil.v3f32(
 half3 test_ceil_half3(half3 p0) { return ceil(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z15test_ceil_half4
-// NATIVE_HALF: call <4 x half> @llvm.ceil.v4f16(
-// NO_HALF-LABEL: define noundef <4 x float> @_Z15test_ceil_half4
-// NO_HALF: call <4 x float> @llvm.ceil.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z15test_ceil_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.ceil.v4f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_ceil_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.ceil.v4f32(
 half4 test_ceil_half4(half4 p0) { return ceil(p0); }
 
-// CHECK-LABEL: define noundef float @_Z15test_ceil_float
-// CHECK: call float @llvm.ceil.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z15test_ceil_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.ceil.f32(
 float test_ceil_float(float p0) { return ceil(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z16test_ceil_float2
-// CHECK: call <2 x float> @llvm.ceil.v2f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_ceil_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.ceil.v2f32(
 float2 test_ceil_float2(float2 p0) { return ceil(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z16test_ceil_float3
-// CHECK: call <3 x float> @llvm.ceil.v3f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_ceil_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.ceil.v3f32(
 float3 test_ceil_float3(float3 p0) { return ceil(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z16test_ceil_float4
-// CHECK: call <4 x float> @llvm.ceil.v4f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_ceil_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.ceil.v4f32(
 float4 test_ceil_float4(float4 p0) { return ceil(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
index 3489f3d3e2b09..d01c2a45c43c8 100644
--- a/clang/test/CodeGenHLSL/builtins/clamp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
@@ -1,19 +1,19 @@
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
 // RUN:  -fnative-half-type -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:  -DTARGET=dx -DFNATTRS=noundef
+// RUN:  -DTARGET=dx -DFNATTRS=noundef -DFFNATTRS="nofpclass(nan inf)"
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:  -DTARGET=dx -DFNATTRS=noundef
+// RUN:  -DTARGET=dx -DFNATTRS=noundef -DFFNATTRS="nofpclass(nan inf)"
 // RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s \
 // RUN:  -fnative-half-type -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:  -DTARGET=spv -DFNATTRS="spir_func noundef"
+// RUN:  -DTARGET=spv -DFNATTRS="spir_func noundef" -DFFNATTRS="nofpclass(nan inf)"
 // RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s \
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:  -DTARGET=spv -DFNATTRS="spir_func noundef"
+// RUN:  -DTARGET=spv -DFNATTRS="spir_func noundef" -DFFNATTRS="nofpclass(nan inf)"
 
 #ifdef __HLSL_ENABLE_16_BIT
 // NATIVE_HALF: define [[FNATTRS]] i16 @_Z16test_clamp_short
@@ -95,49 +95,49 @@ uint64_t3 test_clamp_ulong3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1
 // CHECK: call <4 x i64> @llvm.[[TARGET]].uclamp.v4i64
 uint64_t4 test_clamp_ulong4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); }
 
-// NATIVE_HALF: define [[FNATTRS]] half @_Z15test_clamp_half
-// NATIVE_HALF: call half @llvm.[[TARGET]].nclamp.f16(
-// NO_HALF: define [[FNATTRS]] float @_Z15test_clamp_half
-// NO_HALF: call float @llvm.[[TARGET]].nclamp.f32(
+// NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] half @_Z15test_clamp_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].nclamp.f16(
+// NO_HALF: define [[FNATTRS]] [[FFNATTRS]] float @_Z15test_clamp_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].nclamp.f32(
 half test_clamp_half(half p0, half p1) { return clamp(p0, p1,p1); }
-// NATIVE_HALF: define [[FNATTRS]] <2 x half> @_Z16test_clamp_half2
-// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].nclamp.v2f16
-// NO_HALF: define [[FNATTRS]] <2 x float> @_Z16test_clamp_half2
-// NO_HALF: call <2 x float> @llvm.[[TARGET]].nclamp.v2f32(
+// NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] <2 x half> @_Z16test_clamp_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].nclamp.v2f16
+// NO_HALF: define [[FNATTRS]] [[FFNATTRS]] <2 x float> @_Z16test_clamp_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].nclamp.v2f32(
 half2 test_clamp_half2(half2 p0, half2 p1) { return clamp(p0, p1,p1); }
-// NATIVE_HALF: define [[FNATTRS]] <3 x half> @_Z16test_clamp_half3
-// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].nclamp.v3f16
-// NO_HALF: define [[FNATTRS]] <3 x float> @_Z16test_clamp_half3
-// NO_HALF: call <3 x float> @llvm.[[TARGET]].nclamp.v3f32(
+// NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] <3 x half> @_Z16test_clamp_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].nclamp.v3f16
+// NO_HALF: define [[FNATTRS]] [[FFNATTRS]] <3 x float> @_Z16test_clamp_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].nclamp.v3f32(
 half3 test_clamp_half3(half3 p0, half3 p1) { return clamp(p0, p1,p1); }
-// NATIVE_HALF: define [[FNATTRS]] <4 x half> @_Z16test_clamp_half4
-// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].nclamp.v4f16
-// NO_HALF: define [[FNATTRS]] <4 x float> @_Z16test_clamp_half4
-// NO_HALF: call <4 x float> @llvm.[[TARGET]].nclamp.v4f32(
+// NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] <4 x half> @_Z16test_clamp_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].nclamp.v4f16
+// NO_HALF: define [[FNATTRS]] [[FFNATTRS]] <4 x float> @_Z16test_clamp_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].nclamp.v4f32(
 half4 test_clamp_half4(half4 p0, half4 p1) { return clamp(p0, p1,p1); }
 
-// CHECK: define [[FNATTRS]] float @_Z16test_clamp_float
-// CHECK: call float @llvm.[[TARGET]].nclamp.f32(
+// CHECK: define [[FNATTRS]] [[FFNATTRS]] float @_Z16test_clamp_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].nclamp.f32(
 float test_clamp_float(float p0, float p1) { return clamp(p0, p1,p1); }
-// CHECK: define [[FNATTRS]] <2 x float> @_Z17test_clamp_float2
-// CHECK: call <2 x float> @llvm.[[TARGET]].nclamp.v2f32
+// CHECK: define [[FNATTRS]] [[FFNATTRS]] <2 x float> @_Z17test_clamp_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].nclamp.v2f32
 float2 test_clamp_float2(float2 p0, float2 p1) { return clamp(p0, p1,p1); }
-// CHECK: define [[FNATTRS]] <3 x float> @_Z17test_clamp_float3
-// CHECK: call <3 x float> @llvm.[[TARGET]].nclamp.v3f32
+// CHECK: define [[FNATTRS]] [[FFNATTRS]] <3 x float> @_Z17test_clamp_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].nclamp.v3f32
 float3 test_clamp_float3(float3 p0, float3 p1) { return clamp(p0, p1,p1); }
-// CHECK: define [[FNATTRS]] <4 x float> @_Z17test_clamp_float4
-// CHECK: call <4 x float> @llvm.[[TARGET]].nclamp.v4f32
+// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x float> @_Z17test_clamp_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].nclamp.v4f32
 float4 test_clamp_float4(float4 p0, float4 p1) { return clamp(p0, p1,p1); }
 
-// CHECK: define [[FNATTRS]] double @_Z17test_clamp_double
-// CHECK: call double @llvm.[[TARGET]].nclamp.f64(
+// CHECK: define [[FNATTRS]] [[FFNATTRS]] double @_Z17test_clamp_double
+// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.[[TARGET]].nclamp.f64(
 double test_clamp_double(double p0, double p1) { return clamp(p0, p1,p1); }
-// CHECK: define [[FNATTRS]] <2 x double> @_Z18test_clamp_double2
-// CHECK: call <2 x double> @llvm.[[TARGET]].nclamp.v2f64
+// CHECK: define [[FNATTRS]] [[FFNATTRS]] <2 x double> @_Z18test_clamp_double2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.[[TARGET]].nclamp.v2f64
 double2 test_clamp_double2(double2 p0, double2 p1) { return clamp(p0, p1,p1); }
-// CHECK: define [[FNATTRS]] <3 x double> @_Z18test_clamp_double3
-// CHECK: call <3 x double> @llvm.[[TARGET]].nclamp.v3f64
+// CHECK: define [[FNATTRS]] [[FFNATTRS]] <3 x double> @_Z18test_clamp_double3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.[[TARGET]].nclamp.v3f64
 double3 test_clamp_double3(double3 p0, double3 p1) { return clamp(p0, p1,p1); }
-// CHECK: define [[FNATTRS]] <4 x double> @_Z18test_clamp_double4
-// CHECK: call <4 x double> @llvm.[[TARGET]].nclamp.v4f64
+// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x double> @_Z18test_clamp_double4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64
 double4 test_clamp_double4(double4 p0, double4 p1) { return clamp(p0, p1,p1); }
diff --git a/clang/test/CodeGenHLSL/builtins/clip.hlsl b/clang/test/CodeGenHLSL/builtins/clip.hlsl
index 63843c151e3ac..5a1753766a8a1 100644
--- a/clang/test/CodeGenHLSL/builtins/clip.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/clip.hlsl
@@ -5,13 +5,13 @@
 void test_scalar(float Buf) {
   // CHECK:      define void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]])
   // CHECK:      [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4
-  // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00
+  // CHECK-NEXT: [[FCMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt float [[LOAD]], 0.000000e+00
   // CHECK-NO:   call i1 @llvm.dx.any
   // CHECK-NEXT: call void @llvm.dx.discard(i1 [[FCMP]])
   //
   // SPIRV:      define spir_func void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]])
   // SPIRV:      [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4
-  // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00
+  // SPIRV-NEXT: [[FCMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt float [[LOAD]], 0.000000e+00
   // SPIRV-NO:   call i1 @llvm.spv.any
   // SPIRV-NEXT: br i1 [[FCMP]], label %[[LTL:.*]], label %[[ENDL:.*]]
   // SPIRV:      [[LTL]]: ; preds = %entry
@@ -23,13 +23,13 @@ void test_scalar(float Buf) {
 void test_vector4(float4 Buf) {
   // CHECK:      define void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]])
   // CHECK:      [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16
-  // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer
+  // CHECK-NEXT: [[FCMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt <4 x float> [[LOAD]], zeroinitializer
   // CHECK-NEXT: [[ANYC:%.*]] = call i1 @llvm.dx.any.v4i1(<4 x i1> [[FCMP]])
   // CHECK-NEXT: call void @llvm.dx.discard(i1 [[ANYC]])
   //
   // SPIRV:      define spir_func void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]])
   // SPIRV:      [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16
-  // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer
+  // SPIRV-NEXT: [[FCMP:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt <4 x float> [[LOAD]], zeroinitializer
   // SPIRV-NEXT: [[ANYC:%.*]] = call i1 @llvm.spv.any.v4i1(<4 x i1> [[FCMP]]) 
   // SPIRV-NEXT: br i1 [[ANYC]], label %[[LTL:.*]], label %[[ENDL:.*]]
   // SPIRV:      [[LTL]]: ; preds = %entry
diff --git a/clang/test/CodeGenHLSL/builtins/cos.hlsl b/clang/test/CodeGenHLSL/builtins/cos.hlsl
index 4a41a9ec4a7ca..5f993d50498bf 100644
--- a/clang/test/CodeGenHLSL/builtins/cos.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/cos.hlsl
@@ -5,36 +5,36 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z13test_cos_half
-// NATIVE_HALF: call half @llvm.cos.f16(
-// NO_HALF-LABEL: define noundef float @_Z13test_cos_half
-// NO_HALF: call float @llvm.cos.f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_cos_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.cos.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_cos_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.cos.f32(
 half test_cos_half(half p0) { return cos(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_cos_half2
-// NATIVE_HALF: call <2 x half> @llvm.cos.v2f16
-// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_cos_half2
-// NO_HALF: call <2 x float> @llvm.cos.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_cos_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.cos.v2f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_cos_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.cos.v2f32(
 half2 test_cos_half2(half2 p0) { return cos(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_cos_half3
-// NATIVE_HALF: call <3 x half> @llvm.cos.v3f16
-// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_cos_half3
-// NO_HALF: call <3 x float> @llvm.cos.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_cos_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.cos.v3f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_cos_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.cos.v3f32(
 half3 test_cos_half3(half3 p0) { return cos(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_cos_half4
-// NATIVE_HALF: call <4 x half> @llvm.cos.v4f16
-// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_cos_half4
-// NO_HALF: call <4 x float> @llvm.cos.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_cos_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.cos.v4f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_cos_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.cos.v4f32(
 half4 test_cos_half4(half4 p0) { return cos(p0); }
 
-// CHECK-LABEL: define noundef float @_Z14test_cos_float
-// CHECK: call float @llvm.cos.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_cos_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.cos.f32(
 float test_cos_float(float p0) { return cos(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z15test_cos_float2
-// CHECK: call <2 x float> @llvm.cos.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_cos_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.cos.v2f32
 float2 test_cos_float2(float2 p0) { return cos(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z15test_cos_float3
-// CHECK: call <3 x float> @llvm.cos.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_cos_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.cos.v3f32
 float3 test_cos_float3(float3 p0) { return cos(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z15test_cos_float4
-// CHECK: call <4 x float> @llvm.cos.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_cos_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.cos.v4f32
 float4 test_cos_float4(float4 p0) { return cos(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/cosh.hlsl b/clang/test/CodeGenHLSL/builtins/cosh.hlsl
index a19240497b831..07c64206412db 100644
--- a/clang/test/CodeGenHLSL/builtins/cosh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/cosh.hlsl
@@ -7,53 +7,53 @@
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
 // CHECK-LABEL: test_cosh_half
-// NATIVE_HALF: call half @llvm.cosh.f16
-// NO_HALF: call float @llvm.cosh.f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.cosh.f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.cosh.f32
 half test_cosh_half ( half p0 ) {
   return cosh ( p0 );
 }
 
 // CHECK-LABEL: test_cosh_half2
-// NATIVE_HALF: call <2 x half> @llvm.cosh.v2f16
-// NO_HALF: call <2 x float> @llvm.cosh.v2f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.cosh.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.cosh.v2f32
 half2 test_cosh_half2 ( half2 p0 ) {
   return cosh ( p0 );
 }
 
 // CHECK-LABEL: test_cosh_half3
-// NATIVE_HALF: call <3 x half> @llvm.cosh.v3f16
-// NO_HALF: call <3 x float> @llvm.cosh.v3f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.cosh.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.cosh.v3f32
 half3 test_cosh_half3 ( half3 p0 ) {
   return cosh ( p0 );
 }
 
 // CHECK-LABEL: test_cosh_half4
-// NATIVE_HALF: call <4 x half> @llvm.cosh.v4f16
-// NO_HALF: call <4 x float> @llvm.cosh.v4f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.cosh.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.cosh.v4f32
 half4 test_cosh_half4 ( half4 p0 ) {
   return cosh ( p0 );
 }
 
 // CHECK-LABEL: test_cosh_float
-// CHECK: call float @llvm.cosh.f32
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.cosh.f32
 float test_cosh_float ( float p0 ) {
   return cosh ( p0 );
 }
 
 // CHECK-LABEL: test_cosh_float2
-// CHECK: call <2 x float> @llvm.cosh.v2f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.cosh.v2f32
 float2 test_cosh_float2 ( float2 p0 ) {
   return cosh ( p0 );
 }
 
 // CHECK-LABEL: test_cosh_float3
-// CHECK: call <3 x float> @llvm.cosh.v3f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.cosh.v3f32
 float3 test_cosh_float3 ( float3 p0 ) {
   return cosh ( p0 );
 }
 
 // CHECK-LABEL: test_cosh_float4
-// CHECK: call <4 x float> @llvm.cosh.v4f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.cosh.v4f32
 float4 test_cosh_float4 ( float4 p0 ) {
   return cosh ( p0 );
 }
diff --git a/clang/test/CodeGenHLSL/builtins/cross.hlsl b/clang/test/CodeGenHLSL/builtins/cross.hlsl
index 514e57d36b201..b2a1d6316787d 100644
--- a/clang/test/CodeGenHLSL/builtins/cross.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/cross.hlsl
@@ -2,26 +2,26 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 
 // NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].cross.v3f16(<3 x half>
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].cross.v3f16(<3 x half>
 // NATIVE_HALF: ret <3 x half> %hlsl.cross
 // NO_HALF: define [[FNATTRS]] <3 x float> @
-// NO_HALF: call <3 x float> @llvm.[[TARGET]].cross.v3f32(<3 x float>
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].cross.v3f32(<3 x float>
 // NO_HALF: ret <3 x float> %hlsl.cross
 half3 test_cross_half3(half3 p0, half3 p1)
 {
@@ -29,7 +29,7 @@ half3 test_cross_half3(half3 p0, half3 p1)
 }
 
 // CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %hlsl.cross = call <3 x float> @llvm.[[TARGET]].cross.v3f32(
+// CHECK: %hlsl.cross = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].cross.v3f32(
 // CHECK: ret <3 x float> %hlsl.cross
 float3 test_cross_float3(float3 p0, float3 p1)
 {
diff --git a/clang/test/CodeGenHLSL/builtins/degrees.hlsl b/clang/test/CodeGenHLSL/builtins/degrees.hlsl
index 9e131f4badc19..64531dd2785eb 100644
--- a/clang/test/CodeGenHLSL/builtins/degrees.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/degrees.hlsl
@@ -2,63 +2,63 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 
 // NATIVE_HALF: define [[FNATTRS]] half @
-// NATIVE_HALF: %hlsl.degrees = call half @llvm.[[TARGET]].degrees.f16(
+// NATIVE_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].degrees.f16(
 // NATIVE_HALF: ret half %hlsl.degrees
 // NO_HALF: define [[FNATTRS]] float @
-// NO_HALF: %hlsl.degrees = call float @llvm.[[TARGET]].degrees.f32(
+// NO_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].degrees.f32(
 // NO_HALF: ret float %hlsl.degrees
 half test_degrees_half(half p0) { return degrees(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <2 x half> @
-// NATIVE_HALF: %hlsl.degrees = call <2 x half> @llvm.[[TARGET]].degrees.v2f16
+// NATIVE_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].degrees.v2f16
 // NATIVE_HALF: ret <2 x half> %hlsl.degrees
 // NO_HALF: define [[FNATTRS]] <2 x float> @
-// NO_HALF: %hlsl.degrees = call <2 x float> @llvm.[[TARGET]].degrees.v2f32(
+// NO_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].degrees.v2f32(
 // NO_HALF: ret <2 x float> %hlsl.degrees
 half2 test_degrees_half2(half2 p0) { return degrees(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: %hlsl.degrees = call <3 x half> @llvm.[[TARGET]].degrees.v3f16
+// NATIVE_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].degrees.v3f16
 // NATIVE_HALF: ret <3 x half> %hlsl.degrees
 // NO_HALF: define [[FNATTRS]] <3 x float> @
-// NO_HALF: %hlsl.degrees = call <3 x float> @llvm.[[TARGET]].degrees.v3f32(
+// NO_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].degrees.v3f32(
 // NO_HALF: ret <3 x float> %hlsl.degrees
 half3 test_degrees_half3(half3 p0) { return degrees(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <4 x half> @
-// NATIVE_HALF: %hlsl.degrees = call <4 x half> @llvm.[[TARGET]].degrees.v4f16
+// NATIVE_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].degrees.v4f16
 // NATIVE_HALF: ret <4 x half> %hlsl.degrees
 // NO_HALF: define [[FNATTRS]] <4 x float> @
-// NO_HALF: %hlsl.degrees = call <4 x float> @llvm.[[TARGET]].degrees.v4f32(
+// NO_HALF: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].degrees.v4f32(
 // NO_HALF: ret <4 x float> %hlsl.degrees
 half4 test_degrees_half4(half4 p0) { return degrees(p0); }
 
 // CHECK: define [[FNATTRS]] float @
-// CHECK: %hlsl.degrees = call float @llvm.[[TARGET]].degrees.f32(
+// CHECK: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].degrees.f32(
 // CHECK: ret float %hlsl.degrees
 float test_degrees_float(float p0) { return degrees(p0); }
 // CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %hlsl.degrees = call <2 x float> @llvm.[[TARGET]].degrees.v2f32
+// CHECK: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].degrees.v2f32
 // CHECK: ret <2 x float> %hlsl.degrees
 float2 test_degrees_float2(float2 p0) { return degrees(p0); }
 // CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %hlsl.degrees = call <3 x float> @llvm.[[TARGET]].degrees.v3f32
+// CHECK: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].degrees.v3f32
 // CHECK: ret <3 x float> %hlsl.degrees
 float3 test_degrees_float3(float3 p0) { return degrees(p0); }
 // CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %hlsl.degrees = call <4 x float> @llvm.[[TARGET]].degrees.v4f32
+// CHECK: %hlsl.degrees = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].degrees.v4f32
 // CHECK: ret <4 x float> %hlsl.degrees
 float4 test_degrees_float4(float4 p0) { return degrees(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
index 482f089d4770f..4045169d6516c 100644
--- a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
@@ -2,8 +2,8 @@
 
 // CHECK-LABEL: builtin_bool_to_float_type_promotion
 // CHECK: %conv1 = uitofp i1 %loadedv to double
-// CHECK: %hlsl.dot = fmul double %conv, %conv1
-// CHECK: %conv2 = fptrunc double %hlsl.dot to float
+// CHECK: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn double %conv, %conv1
+// CHECK: %conv2 = fptrunc reassoc nnan ninf nsz arcp afn double %hlsl.dot to float
 // CHECK: ret float %conv2
 float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
   return __builtin_hlsl_dot ( p0, p1 );
@@ -11,19 +11,19 @@ float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
 
 // CHECK-LABEL: builtin_bool_to_float_arg1_type_promotion
 // CHECK: %conv = uitofp i1 %loadedv to double
-// CHECK: %conv1 = fpext float %1 to double
-// CHECK: %hlsl.dot = fmul double %conv, %conv1
-// CHECK: %conv2 = fptrunc double %hlsl.dot to float
+// CHECK: %conv1 = fpext reassoc nnan ninf nsz arcp afn float %1 to double
+// CHECK: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn double %conv, %conv1
+// CHECK: %conv2 = fptrunc reassoc nnan ninf nsz arcp afn double %hlsl.dot to float
 // CHECK: ret float %conv2
 float builtin_bool_to_float_arg1_type_promotion ( bool p0, float p1 ) {
   return __builtin_hlsl_dot ( p0, p1 );
 }
 
 // CHECK-LABEL: builtin_dot_int_to_float_promotion
-// CHECK: %conv = fpext float %0 to double
+// CHECK: %conv = fpext reassoc nnan ninf nsz arcp afn float %0 to double
 // CHECK: %conv1 = sitofp i32 %1 to double
-// CHECK: dot = fmul double %conv, %conv1
-// CHECK: %conv2 = fptrunc double %hlsl.dot to float
+// CHECK: dot = fmul reassoc nnan ninf nsz arcp afn double %conv, %conv1
+// CHECK: %conv2 = fptrunc reassoc nnan ninf nsz arcp afn double %hlsl.dot to float
 // CHECK: ret float %conv2
 float builtin_dot_int_to_float_promotion ( float p0, int p1 ) {
   return __builtin_hlsl_dot ( p0, p1 );
diff --git a/clang/test/CodeGenHLSL/builtins/dot.hlsl b/clang/test/CodeGenHLSL/builtins/dot.hlsl
index 3f6be04a595e2..7064066780da6 100644
--- a/clang/test/CodeGenHLSL/builtins/dot.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot.hlsl
@@ -115,46 +115,46 @@ uint16_t test_dot_ushort3(uint16_t3 p0, uint16_t3 p1) { return dot(p0, p1); }
 uint16_t test_dot_ushort4(uint16_t4 p0, uint16_t4 p1) { return dot(p0, p1); }
 #endif
 
-// NATIVE_HALF: %hlsl.dot = fmul half
+// NATIVE_HALF: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn half
 // NATIVE_HALF: ret half %hlsl.dot
-// NO_HALF: %hlsl.dot = fmul float
+// NO_HALF: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn float
 // NO_HALF: ret float %hlsl.dot
 half test_dot_half(half p0, half p1) { return dot(p0, p1); }
 
-// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v2f16(<2 x half>
+// NATIVE_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn half @llvm.[[ICF]].fdot.v2f16(<2 x half>
 // NATIVE_HALF: ret half %hlsl.dot
-// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float>
+// NO_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v2f32(<2 x float>
 // NO_HALF: ret float %hlsl.dot
 half test_dot_half2(half2 p0, half2 p1) { return dot(p0, p1); }
 
-// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v3f16(<3 x half>
+// NATIVE_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn half @llvm.[[ICF]].fdot.v3f16(<3 x half>
 // NATIVE_HALF: ret half %hlsl.dot
-// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float>
+// NO_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v3f32(<3 x float>
 // NO_HALF: ret float %hlsl.dot
 half test_dot_half3(half3 p0, half3 p1) { return dot(p0, p1); }
 
-// NATIVE_HALF: %hlsl.dot = call half @llvm.[[ICF]].fdot.v4f16(<4 x half>
+// NATIVE_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn half @llvm.[[ICF]].fdot.v4f16(<4 x half>
 // NATIVE_HALF: ret half %hlsl.dot
-// NO_HALF: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float>
+// NO_HALF: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v4f32(<4 x float>
 // NO_HALF: ret float %hlsl.dot
 half test_dot_half4(half4 p0, half4 p1) { return dot(p0, p1); }
 
-// CHECK: %hlsl.dot = fmul float
+// CHECK: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn float
 // CHECK: ret float %hlsl.dot
 float test_dot_float(float p0, float p1) { return dot(p0, p1); }
 
-// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v2f32(<2 x float>
+// CHECK: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v2f32(<2 x float>
 // CHECK: ret float %hlsl.dot
 float test_dot_float2(float2 p0, float2 p1) { return dot(p0, p1); }
 
-// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v3f32(<3 x float>
+// CHECK: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v3f32(<3 x float>
 // CHECK: ret float %hlsl.dot
 float test_dot_float3(float3 p0, float3 p1) { return dot(p0, p1); }
 
-// CHECK: %hlsl.dot = call float @llvm.[[ICF]].fdot.v4f32(<4 x float>
+// CHECK: %hlsl.dot = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].fdot.v4f32(<4 x float>
 // CHECK: ret float %hlsl.dot
 float test_dot_float4(float4 p0, float4 p1) { return dot(p0, p1); }
 
-// CHECK: %hlsl.dot = fmul double
+// CHECK: %hlsl.dot = fmul reassoc nnan ninf nsz arcp afn double
 // CHECK: ret double %hlsl.dot
 double test_dot_double(double p0, double p1) { return dot(p0, p1); }
diff --git a/clang/test/CodeGenHLSL/builtins/exp.hlsl b/clang/test/CodeGenHLSL/builtins/exp.hlsl
index 3445cfd2e71f6..6ed40ed8f433c 100644
--- a/clang/test/CodeGenHLSL/builtins/exp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/exp.hlsl
@@ -5,48 +5,48 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z13test_exp_half
-// NATIVE_HALF: %elt.exp = call half @llvm.exp.f16(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_exp_half
+// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(
 // NATIVE_HALF: ret half %elt.exp
-// NO_HALF-LABEL: define noundef float @_Z13test_exp_half
-// NO_HALF: %elt.exp = call float @llvm.exp.f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_exp_half
+// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32(
 // NO_HALF: ret float %elt.exp
 half test_exp_half(half p0) { return exp(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_exp_half2
-// NATIVE_HALF: %elt.exp = call <2 x half> @llvm.exp.v2f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_exp_half2
+// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp.v2f16
 // NATIVE_HALF: ret <2 x half> %elt.exp
-// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_exp_half2
-// NO_HALF: %elt.exp = call <2 x float> @llvm.exp.v2f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_exp_half2
+// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32(
 // NO_HALF: ret <2 x float> %elt.exp
 half2 test_exp_half2(half2 p0) { return exp(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_exp_half3
-// NATIVE_HALF: %elt.exp = call <3 x half> @llvm.exp.v3f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_exp_half3
+// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp.v3f16
 // NATIVE_HALF: ret <3 x half> %elt.exp
-// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_exp_half3
-// NO_HALF: %elt.exp = call <3 x float> @llvm.exp.v3f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_exp_half3
+// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32(
 // NO_HALF: ret <3 x float> %elt.exp
 half3 test_exp_half3(half3 p0) { return exp(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_exp_half4
-// NATIVE_HALF: %elt.exp = call <4 x half> @llvm.exp.v4f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_exp_half4
+// NATIVE_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp.v4f16
 // NATIVE_HALF: ret <4 x half> %elt.exp
-// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_exp_half4
-// NO_HALF: %elt.exp = call <4 x float> @llvm.exp.v4f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_exp_half4
+// NO_HALF: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32(
 // NO_HALF: ret <4 x float> %elt.exp
 half4 test_exp_half4(half4 p0) { return exp(p0); }
 
-// CHECK-LABEL: define noundef float @_Z14test_exp_float
-// CHECK: %elt.exp = call float @llvm.exp.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_exp_float
+// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32(
 // CHECK: ret float %elt.exp
 float test_exp_float(float p0) { return exp(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z15test_exp_float2
-// CHECK: %elt.exp = call <2 x float> @llvm.exp.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_exp_float2
+// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp.v2f32
 // CHECK: ret <2 x float> %elt.exp
 float2 test_exp_float2(float2 p0) { return exp(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z15test_exp_float3
-// CHECK: %elt.exp = call <3 x float> @llvm.exp.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_exp_float3
+// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp.v3f32
 // CHECK: ret <3 x float> %elt.exp
 float3 test_exp_float3(float3 p0) { return exp(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z15test_exp_float4
-// CHECK: %elt.exp = call <4 x float> @llvm.exp.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_exp_float4
+// CHECK: %elt.exp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp.v4f32
 // CHECK: ret <4 x float> %elt.exp
 float4 test_exp_float4(float4 p0) { return exp(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/exp2.hlsl b/clang/test/CodeGenHLSL/builtins/exp2.hlsl
index 7bfc897beee16..b067427e46368 100644
--- a/clang/test/CodeGenHLSL/builtins/exp2.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/exp2.hlsl
@@ -5,48 +5,48 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z14test_exp2_half
-// NATIVE_HALF: %elt.exp2 = call half @llvm.exp2.f16(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z14test_exp2_half
+// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn half @llvm.exp2.f16(
 // NATIVE_HALF: ret half %elt.exp2
-// NO_HALF-LABEL: define noundef float @_Z14test_exp2_half
-// NO_HALF: %elt.exp2 = call float @llvm.exp2.f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z14test_exp2_half
+// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32(
 // NO_HALF: ret float %elt.exp2
 half test_exp2_half(half p0) { return exp2(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z15test_exp2_half2
-// NATIVE_HALF: %elt.exp2 = call <2 x half> @llvm.exp2.v2f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z15test_exp2_half2
+// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.exp2.v2f16
 // NATIVE_HALF: ret <2 x half> %elt.exp2
-// NO_HALF-LABEL: define noundef <2 x float> @_Z15test_exp2_half2
-// NO_HALF: %elt.exp2 = call <2 x float> @llvm.exp2.v2f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_exp2_half2
+// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32(
 // NO_HALF: ret <2 x float> %elt.exp2
 half2 test_exp2_half2(half2 p0) { return exp2(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z15test_exp2_half3
-// NATIVE_HALF: %elt.exp2 = call <3 x half> @llvm.exp2.v3f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z15test_exp2_half3
+// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.exp2.v3f16
 // NATIVE_HALF: ret <3 x half> %elt.exp2
-// NO_HALF-LABEL: define noundef <3 x float> @_Z15test_exp2_half3
-// NO_HALF: %elt.exp2 = call <3 x float> @llvm.exp2.v3f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_exp2_half3
+// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32(
 // NO_HALF: ret <3 x float> %elt.exp2
 half3 test_exp2_half3(half3 p0) { return exp2(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z15test_exp2_half4
-// NATIVE_HALF: %elt.exp2 = call <4 x half> @llvm.exp2.v4f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z15test_exp2_half4
+// NATIVE_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.exp2.v4f16
 // NATIVE_HALF: ret <4 x half> %elt.exp2
-// NO_HALF-LABEL: define noundef <4 x float> @_Z15test_exp2_half4
-// NO_HALF: %elt.exp2 = call <4 x float> @llvm.exp2.v4f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_exp2_half4
+// NO_HALF: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32(
 // NO_HALF: ret <4 x float> %elt.exp2
 half4 test_exp2_half4(half4 p0) { return exp2(p0); }
 
-// CHECK-LABEL: define noundef float @_Z15test_exp2_float
-// CHECK: %elt.exp2 = call float @llvm.exp2.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z15test_exp2_float
+// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn float @llvm.exp2.f32(
 // CHECK: ret float %elt.exp2
 float test_exp2_float(float p0) { return exp2(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z16test_exp2_float2
-// CHECK: %elt.exp2 = call <2 x float> @llvm.exp2.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_exp2_float2
+// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.exp2.v2f32
 // CHECK: ret <2 x float> %elt.exp2
 float2 test_exp2_float2(float2 p0) { return exp2(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z16test_exp2_float3
-// CHECK: %elt.exp2 = call <3 x float> @llvm.exp2.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_exp2_float3
+// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.exp2.v3f32
 // CHECK: ret <3 x float> %elt.exp2
 float3 test_exp2_float3(float3 p0) { return exp2(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z16test_exp2_float4
-// CHECK: %elt.exp2 = call <4 x float> @llvm.exp2.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_exp2_float4
+// CHECK: %elt.exp2 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.exp2.v4f32
 // CHECK: ret <4 x float> %elt.exp2
 float4 test_exp2_float4(float4 p0) { return exp2(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/floor.hlsl b/clang/test/CodeGenHLSL/builtins/floor.hlsl
index c2d6f1bcc335c..f610baeeefd48 100644
--- a/clang/test/CodeGenHLSL/builtins/floor.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/floor.hlsl
@@ -7,36 +7,36 @@
 
 using hlsl::floor;
 
-// NATIVE_HALF-LABEL: define noundef half @_Z15test_floor_half
-// NATIVE_HALF: call half @llvm.floor.f16(
-// NO_HALF-LABEL: define noundef float @_Z15test_floor_half
-// NO_HALF: call float @llvm.floor.f32(float %0)
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z15test_floor_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.floor.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z15test_floor_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.floor.f32(float %0)
 half test_floor_half(half p0) { return floor(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_floor_half2
-// NATIVE_HALF: call <2 x half> @llvm.floor.v2f16(
-// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_floor_half2
-// NO_HALF: call <2 x float> @llvm.floor.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z16test_floor_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.floor.v2f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_floor_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.floor.v2f32(
 half2 test_floor_half2(half2 p0) { return floor(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_floor_half3
-// NATIVE_HALF: call <3 x half> @llvm.floor.v3f16(
-// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_floor_half3
-// NO_HALF: call <3 x float> @llvm.floor.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z16test_floor_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.floor.v3f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_floor_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.floor.v3f32(
 half3 test_floor_half3(half3 p0) { return floor(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_floor_half4
-// NATIVE_HALF: call <4 x half> @llvm.floor.v4f16(
-// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_floor_half4
-// NO_HALF: call <4 x float> @llvm.floor.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z16test_floor_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.floor.v4f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_floor_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.floor.v4f32(
 half4 test_floor_half4(half4 p0) { return floor(p0); }
 
-// CHECK-LABEL: define noundef float @_Z16test_floor_float
-// CHECK: call float @llvm.floor.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z16test_floor_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.floor.f32(
 float test_floor_float(float p0) { return floor(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z17test_floor_float2
-// CHECK: call <2 x float> @llvm.floor.v2f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z17test_floor_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.floor.v2f32(
 float2 test_floor_float2(float2 p0) { return floor(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z17test_floor_float3
-// CHECK: call <3 x float> @llvm.floor.v3f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z17test_floor_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.floor.v3f32(
 float3 test_floor_float3(float3 p0) { return floor(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z17test_floor_float4
-// CHECK: call <4 x float> @llvm.floor.v4f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z17test_floor_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.floor.v4f32(
 float4 test_floor_float4(float4 p0) { return floor(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/fmod.hlsl b/clang/test/CodeGenHLSL/builtins/fmod.hlsl
index 708779daaa7b6..b62967114d456 100644
--- a/clang/test/CodeGenHLSL/builtins/fmod.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/fmod.hlsl
@@ -5,7 +5,7 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   -DFNATTRS=noundef -DTYPE=half
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTYPE=half
 
 //
 // ---------- No Native Half support test -----------
@@ -13,7 +13,7 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s \
-// RUN:   -DFNATTRS=noundef -DTYPE=float
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTYPE=float
 
 
 // Spirv target:
@@ -23,7 +23,7 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   -DFNATTRS="spir_func noundef" -DTYPE=half
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half
 
 //
 // ---------- No Native Half support test -----------
@@ -31,47 +31,47 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s \
-// RUN:   -DFNATTRS="spir_func noundef" -DTYPE=float
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float
 
 
 
 // CHECK: define [[FNATTRS]] [[TYPE]] @
-// CHECK: %fmod = frem [[TYPE]]
+// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]]
 // CHECK: ret [[TYPE]] %fmod
 half test_fmod_half(half p0, half p1) { return fmod(p0, p1); }
 
 // CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @
-// CHECK: %fmod = frem <2 x [[TYPE]]>
+// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]>
 // CHECK: ret <2 x [[TYPE]]> %fmod
 half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); }
 
 // CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @
-// CHECK: %fmod = frem <3 x [[TYPE]]>
+// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]>
 // CHECK: ret <3 x [[TYPE]]> %fmod
 half3 test_fmod_half3(half3 p0, half3 p1) { return fmod(p0, p1); }
 
 // CHECK: define [[FNATTRS]] <4 x [[TYPE]]> @
-// CHECK: %fmod = frem <4 x [[TYPE]]>
+// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]>
 // CHECK: ret <4 x [[TYPE]]> %fmod
 half4 test_fmod_half4(half4 p0, half4 p1) { return fmod(p0, p1); }
 
 // CHECK: define [[FNATTRS]] float @
-// CHECK: %fmod = frem float
+// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn float
 // CHECK: ret float %fmod
 float test_fmod_float(float p0, float p1) { return fmod(p0, p1); }
 
 // CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %fmod = frem <2 x float>
+// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x float>
 // CHECK: ret <2 x float> %fmod
 float2 test_fmod_float2(float2 p0, float2 p1) { return fmod(p0, p1); }
 
 // CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %fmod = frem <3 x float>
+// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x float>
 // CHECK: ret <3 x float> %fmod
 float3 test_fmod_float3(float3 p0, float3 p1) { return fmod(p0, p1); }
 
 // CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %fmod = frem <4 x float>
+// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x float>
 // CHECK: ret <4 x float> %fmod
 float4 test_fmod_float4(float4 p0, float4 p1) { return fmod(p0, p1); }
 
diff --git a/clang/test/CodeGenHLSL/builtins/frac.hlsl b/clang/test/CodeGenHLSL/builtins/frac.hlsl
index f0fbba978c023..7b105ce84359f 100644
--- a/clang/test/CodeGenHLSL/builtins/frac.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/frac.hlsl
@@ -2,63 +2,63 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 
 // NATIVE_HALF: define [[FNATTRS]] half @
-// NATIVE_HALF: %hlsl.frac = call half @llvm.[[TARGET]].frac.f16(
+// NATIVE_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].frac.f16(
 // NATIVE_HALF: ret half %hlsl.frac
 // NO_HALF: define [[FNATTRS]] float @
-// NO_HALF: %hlsl.frac = call float @llvm.[[TARGET]].frac.f32(
+// NO_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].frac.f32(
 // NO_HALF: ret float %hlsl.frac
 half test_frac_half(half p0) { return frac(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <2 x half> @
-// NATIVE_HALF: %hlsl.frac = call <2 x half> @llvm.[[TARGET]].frac.v2f16
+// NATIVE_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].frac.v2f16
 // NATIVE_HALF: ret <2 x half> %hlsl.frac
 // NO_HALF: define [[FNATTRS]] <2 x float> @
-// NO_HALF: %hlsl.frac = call <2 x float> @llvm.[[TARGET]].frac.v2f32(
+// NO_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].frac.v2f32(
 // NO_HALF: ret <2 x float> %hlsl.frac
 half2 test_frac_half2(half2 p0) { return frac(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: %hlsl.frac = call <3 x half> @llvm.[[TARGET]].frac.v3f16
+// NATIVE_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].frac.v3f16
 // NATIVE_HALF: ret <3 x half> %hlsl.frac
 // NO_HALF: define [[FNATTRS]] <3 x float> @
-// NO_HALF: %hlsl.frac = call <3 x float> @llvm.[[TARGET]].frac.v3f32(
+// NO_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].frac.v3f32(
 // NO_HALF: ret <3 x float> %hlsl.frac
 half3 test_frac_half3(half3 p0) { return frac(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <4 x half> @
-// NATIVE_HALF: %hlsl.frac = call <4 x half> @llvm.[[TARGET]].frac.v4f16
+// NATIVE_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].frac.v4f16
 // NATIVE_HALF: ret <4 x half> %hlsl.frac
 // NO_HALF: define [[FNATTRS]] <4 x float> @
-// NO_HALF: %hlsl.frac = call <4 x float> @llvm.[[TARGET]].frac.v4f32(
+// NO_HALF: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].frac.v4f32(
 // NO_HALF: ret <4 x float> %hlsl.frac
 half4 test_frac_half4(half4 p0) { return frac(p0); }
 
 // CHECK: define [[FNATTRS]] float @
-// CHECK: %hlsl.frac = call float @llvm.[[TARGET]].frac.f32(
+// CHECK: %hlsl.frac = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].frac.f32(
 // CHECK: ret float %hlsl.frac
 float test_frac_float(float p0) { return frac(p0); }
 // CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %hlsl.frac = call <2 x float> @llvm.[[TARGET]].frac.v2f32
+// CHECK: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].frac.v2f32
 // CHECK: ret <2 x float> %hlsl.frac
 float2 test_frac_float2(float2 p0) { return frac(p0); }
 // CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %hlsl.frac = call <3 x float> @llvm.[[TARGET]].frac.v3f32
+// CHECK: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].frac.v3f32
 // CHECK: ret <3 x float> %hlsl.frac
 float3 test_frac_float3(float3 p0) { return frac(p0); }
 // CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %hlsl.frac = call <4 x float> @llvm.[[TARGET]].frac.v4f32
+// CHECK: %hlsl.frac = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].frac.v4f32
 // CHECK: ret <4 x float> %hlsl.frac
 float4 test_frac_float4(float4 p0) { return frac(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index 1c23b0df04df9..a24f01d275440 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -6,36 +6,36 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: call half @llvm.fabs.f16(half
-// NO_HALF: call float @llvm.fabs.f32(float
+// NATIVE_HALF: define noundef nofpclass(nan inf) half @
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16(half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float
 // NATIVE_HALF: ret half
 // NO_HALF: ret float
 half test_length_half(half p0)
 {
   return length(p0);
 }
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32(
+// NATIVE_HALF: define noundef nofpclass(nan inf) half @
+// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v2f16
+// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32(
 // NATIVE_HALF: ret half %hlsl.length
 // NO_HALF: ret float %hlsl.length
 half test_length_half2(half2 p0)
 {
   return length(p0);
 }
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32(
+// NATIVE_HALF: define noundef nofpclass(nan inf) half @
+// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v3f16
+// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32(
 // NATIVE_HALF: ret half %hlsl.length
 // NO_HALF: ret float %hlsl.length
 half test_length_half3(half3 p0)
 {
   return length(p0);
 }
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32(
+// NATIVE_HALF: define noundef nofpclass(nan inf) half @
+// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v4f16
+// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32(
 // NATIVE_HALF: ret half %hlsl.length
 // NO_HALF: ret float %hlsl.length
 half test_length_half4(half4 p0)
@@ -43,29 +43,29 @@ half test_length_half4(half4 p0)
   return length(p0);
 }
 
-// CHECK: define noundef float @
-// CHECK: call float @llvm.fabs.f32(float
+// CHECK: define noundef nofpclass(nan inf) float @
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float
 // CHECK: ret float
 float test_length_float(float p0)
 {
   return length(p0);
 }
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32(
+// CHECK: define noundef nofpclass(nan inf) float @
+// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32(
 // CHECK: ret float %hlsl.length
 float test_length_float2(float2 p0)
 {
   return length(p0);
 }
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32(
+// CHECK: define noundef nofpclass(nan inf) float @
+// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32(
 // CHECK: ret float %hlsl.length
 float test_length_float3(float3 p0)
 {
   return length(p0);
 }
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32(
+// CHECK: define noundef nofpclass(nan inf) float @
+// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32(
 // CHECK: ret float %hlsl.length
 float test_length_float4(float4 p0)
 {
diff --git a/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl
index f9b3cbcddfb69..c98693f32c834 100644
--- a/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl
@@ -1,14 +1,14 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
 // CHECK-LABEL: builtin_lerp_half_vector
-// CHECK: %hlsl.lerp = call <3 x half> @llvm.dx.lerp.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2)
+// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.dx.lerp.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2)
 // CHECK: ret <3 x half> %hlsl.lerp
 half3 builtin_lerp_half_vector (half3 p0) {
   return __builtin_hlsl_lerp ( p0, p0, p0 );
 }
 
 // CHECK-LABEL: builtin_lerp_floar_vector
-// CHECK: %hlsl.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
+// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2)
 // CHECK: ret <2 x float> %hlsl.lerp
 float2 builtin_lerp_floar_vector ( float2 p0) {
   return __builtin_hlsl_lerp ( p0, p0, p0 );
diff --git a/clang/test/CodeGenHLSL/builtins/lerp.hlsl b/clang/test/CodeGenHLSL/builtins/lerp.hlsl
index b11046894bd88..d7a7113de4878 100644
--- a/clang/test/CodeGenHLSL/builtins/lerp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lerp.hlsl
@@ -2,57 +2,57 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 
-// NATIVE_HALF: %hlsl.lerp = call half @llvm.[[TARGET]].lerp.f16(half %{{.*}}, half %{{.*}}, half %{{.*}})
+// NATIVE_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].lerp.f16(half %{{.*}}, half %{{.*}}, half %{{.*}})
 // NATIVE_HALF: ret half %hlsl.lerp
-// NO_HALF: %hlsl.lerp = call float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
+// NO_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
 // NO_HALF: ret float %hlsl.lerp
 half test_lerp_half(half p0) { return lerp(p0, p0, p0); }
 
-// NATIVE_HALF: %hlsl.lerp = call <2 x half> @llvm.[[TARGET]].lerp.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}})
+// NATIVE_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].lerp.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}})
 // NATIVE_HALF: ret <2 x half> %hlsl.lerp
-// NO_HALF: %hlsl.lerp = call <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
+// NO_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
 // NO_HALF: ret <2 x float> %hlsl.lerp
 half2 test_lerp_half2(half2 p0) { return lerp(p0, p0, p0); }
 
-// NATIVE_HALF: %hlsl.lerp = call <3 x half> @llvm.[[TARGET]].lerp.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}}, <3 x half> %{{.*}})
+// NATIVE_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].lerp.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}}, <3 x half> %{{.*}})
 // NATIVE_HALF: ret <3 x half> %hlsl.lerp
-// NO_HALF: %hlsl.lerp = call <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
+// NO_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
 // NO_HALF: ret <3 x float> %hlsl.lerp
 half3 test_lerp_half3(half3 p0) { return lerp(p0, p0, p0); }
 
-// NATIVE_HALF: %hlsl.lerp = call <4 x half> @llvm.[[TARGET]].lerp.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x half> %{{.*}})
+// NATIVE_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].lerp.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x half> %{{.*}})
 // NATIVE_HALF: ret <4 x half> %hlsl.lerp
-// NO_HALF: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+// NO_HALF: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
 // NO_HALF: ret <4 x float> %hlsl.lerp
 half4 test_lerp_half4(half4 p0) { return lerp(p0, p0, p0); }
 
-// CHECK: %hlsl.lerp = call float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
+// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].lerp.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
 // CHECK: ret float %hlsl.lerp
 float test_lerp_float(float p0) { return lerp(p0, p0, p0); }
 
-// CHECK: %hlsl.lerp = call <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
+// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}, <2 x float> %{{.*}})
 // CHECK: ret <2 x float> %hlsl.lerp
 float2 test_lerp_float2(float2 p0) { return lerp(p0, p0, p0); }
 
-// CHECK: %hlsl.lerp = call <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
+// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}, <3 x float> %{{.*}})
 // CHECK: ret <3 x float> %hlsl.lerp
 float3 test_lerp_float3(float3 p0) { return lerp(p0, p0, p0); }
 
-// CHECK: %hlsl.lerp = call <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+// CHECK: %hlsl.lerp = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
 // CHECK: ret <4 x float> %hlsl.lerp
 float4 test_lerp_float4(float4 p0) { return lerp(p0, p0, p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/log.hlsl b/clang/test/CodeGenHLSL/builtins/log.hlsl
index 71ce502eb8c4a..e489939594a53 100644
--- a/clang/test/CodeGenHLSL/builtins/log.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/log.hlsl
@@ -5,36 +5,36 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z13test_log_half
-// NATIVE_HALF: call half @llvm.log.f16(
-// NO_HALF-LABEL: define noundef float @_Z13test_log_half
-// NO_HALF: call float @llvm.log.f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_log_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.log.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_log_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.log.f32(
 half test_log_half(half p0) { return log(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_log_half2
-// NATIVE_HALF: call <2 x half> @llvm.log.v2f16
-// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_log_half2
-// NO_HALF: call <2 x float> @llvm.log.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_log_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.log.v2f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_log_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log.v2f32(
 half2 test_log_half2(half2 p0) { return log(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_log_half3
-// NATIVE_HALF: call <3 x half> @llvm.log.v3f16
-// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_log_half3
-// NO_HALF: call <3 x float> @llvm.log.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_log_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.log.v3f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_log_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log.v3f32(
 half3 test_log_half3(half3 p0) { return log(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_log_half4
-// NATIVE_HALF: call <4 x half> @llvm.log.v4f16
-// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_log_half4
-// NO_HALF: call <4 x float> @llvm.log.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_log_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.log.v4f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_log_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log.v4f32(
 half4 test_log_half4(half4 p0) { return log(p0); }
 
-// CHECK-LABEL: define noundef float @_Z14test_log_float
-// CHECK: call float @llvm.log.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_log_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.log.f32(
 float test_log_float(float p0) { return log(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z15test_log_float2
-// CHECK: call <2 x float> @llvm.log.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_log_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log.v2f32
 float2 test_log_float2(float2 p0) { return log(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z15test_log_float3
-// CHECK: call <3 x float> @llvm.log.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_log_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log.v3f32
 float3 test_log_float3(float3 p0) { return log(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z15test_log_float4
-// CHECK: call <4 x float> @llvm.log.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_log_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log.v4f32
 float4 test_log_float4(float4 p0) { return log(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/log10.hlsl b/clang/test/CodeGenHLSL/builtins/log10.hlsl
index e15b6f5747b0a..37c8e837c45a3 100644
--- a/clang/test/CodeGenHLSL/builtins/log10.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/log10.hlsl
@@ -5,36 +5,36 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z15test_log10_half
-// NATIVE_HALF: call half @llvm.log10.f16(
-// NO_HALF-LABEL: define noundef float @_Z15test_log10_half
-// NO_HALF: call float @llvm.log10.f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z15test_log10_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.log10.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z15test_log10_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.log10.f32(
 half test_log10_half(half p0) { return log10(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_log10_half2
-// NATIVE_HALF: call <2 x half> @llvm.log10.v2f16
-// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_log10_half2
-// NO_HALF: call <2 x float> @llvm.log10.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z16test_log10_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.log10.v2f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_log10_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log10.v2f32(
 half2 test_log10_half2(half2 p0) { return log10(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_log10_half3
-// NATIVE_HALF: call <3 x half> @llvm.log10.v3f16
-// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_log10_half3
-// NO_HALF: call <3 x float> @llvm.log10.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z16test_log10_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.log10.v3f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_log10_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log10.v3f32(
 half3 test_log10_half3(half3 p0) { return log10(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_log10_half4
-// NATIVE_HALF: call <4 x half> @llvm.log10.v4f16
-// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_log10_half4
-// NO_HALF: call <4 x float> @llvm.log10.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z16test_log10_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.log10.v4f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_log10_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log10.v4f32(
 half4 test_log10_half4(half4 p0) { return log10(p0); }
 
-// CHECK-LABEL: define noundef float @_Z16test_log10_float
-// CHECK: call float @llvm.log10.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z16test_log10_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.log10.f32(
 float test_log10_float(float p0) { return log10(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z17test_log10_float2
-// CHECK: call <2 x float> @llvm.log10.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z17test_log10_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log10.v2f32
 float2 test_log10_float2(float2 p0) { return log10(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z17test_log10_float3
-// CHECK: call <3 x float> @llvm.log10.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z17test_log10_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log10.v3f32
 float3 test_log10_float3(float3 p0) { return log10(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z17test_log10_float4
-// CHECK: call <4 x float> @llvm.log10.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z17test_log10_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log10.v4f32
 float4 test_log10_float4(float4 p0) { return log10(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/log2.hlsl b/clang/test/CodeGenHLSL/builtins/log2.hlsl
index 575761a5f637c..5159d5bb0fa4e 100644
--- a/clang/test/CodeGenHLSL/builtins/log2.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/log2.hlsl
@@ -5,36 +5,36 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z14test_log2_half
-// NATIVE_HALF: call half @llvm.log2.f16(
-// NO_HALF-LABEL: define noundef float @_Z14test_log2_half
-// NO_HALF: call float @llvm.log2.f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z14test_log2_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.log2.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z14test_log2_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.log2.f32(
 half test_log2_half(half p0) { return log2(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z15test_log2_half2
-// NATIVE_HALF: call <2 x half> @llvm.log2.v2f16
-// NO_HALF-LABEL: define noundef <2 x float> @_Z15test_log2_half2
-// NO_HALF: call <2 x float> @llvm.log2.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z15test_log2_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.log2.v2f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_log2_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log2.v2f32(
 half2 test_log2_half2(half2 p0) { return log2(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z15test_log2_half3
-// NATIVE_HALF: call <3 x half> @llvm.log2.v3f16
-// NO_HALF-LABEL: define noundef <3 x float> @_Z15test_log2_half3
-// NO_HALF: call <3 x float> @llvm.log2.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z15test_log2_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.log2.v3f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_log2_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log2.v3f32(
 half3 test_log2_half3(half3 p0) { return log2(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z15test_log2_half4
-// NATIVE_HALF: call <4 x half> @llvm.log2.v4f16
-// NO_HALF-LABEL: define noundef <4 x float> @_Z15test_log2_half4
-// NO_HALF: call <4 x float> @llvm.log2.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z15test_log2_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.log2.v4f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_log2_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log2.v4f32(
 half4 test_log2_half4(half4 p0) { return log2(p0); }
 
-// CHECK-LABEL: define noundef float @_Z15test_log2_float
-// CHECK: call float @llvm.log2.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z15test_log2_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.log2.f32(
 float test_log2_float(float p0) { return log2(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z16test_log2_float2
-// CHECK: call <2 x float> @llvm.log2.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_log2_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.log2.v2f32
 float2 test_log2_float2(float2 p0) { return log2(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z16test_log2_float3
-// CHECK: call <3 x float> @llvm.log2.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_log2_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.log2.v3f32
 float3 test_log2_float3(float3 p0) { return log2(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z16test_log2_float4
-// CHECK: call <4 x float> @llvm.log2.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_log2_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.log2.v4f32
 float4 test_log2_float4(float4 p0) { return log2(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/mad.hlsl b/clang/test/CodeGenHLSL/builtins/mad.hlsl
index 265a2552c80fb..e764e20748d58 100644
--- a/clang/test/CodeGenHLSL/builtins/mad.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/mad.hlsl
@@ -67,104 +67,104 @@ int16_t4 test_mad_int16_t4(int16_t4 p0, int16_t4 p1, int16_t4 p2) { return mad(p
 // NATIVE_HALF: %[[p0:.*]] = load half, ptr %p0.addr, align 2
 // NATIVE_HALF: %[[p1:.*]] = load half, ptr %p1.addr, align 2
 // NATIVE_HALF: %[[p2:.*]] = load half, ptr %p2.addr, align 2
-// NATIVE_HALF: %hlsl.fmad = call half @llvm.fmuladd.f16(half %[[p0]], half %[[p1]], half %[[p2]])
+// NATIVE_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn half @llvm.fmuladd.f16(half %[[p0]], half %[[p1]], half %[[p2]])
 // NATIVE_HALF: ret half %hlsl.fmad
 // NO_HALF: %[[p0:.*]] = load float, ptr %p0.addr, align 4
 // NO_HALF: %[[p1:.*]] = load float, ptr %p1.addr, align 4
 // NO_HALF: %[[p2:.*]] = load float, ptr %p2.addr, align 4
-// NO_HALF: %hlsl.fmad = call float @llvm.fmuladd.f32(float %[[p0]], float %[[p1]], float %[[p2]])
+// NO_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn float @llvm.fmuladd.f32(float %[[p0]], float %[[p1]], float %[[p2]])
 // NO_HALF: ret float %hlsl.fmad
 half test_mad_half(half p0, half p1, half p2) { return mad(p0, p1, p2); }
 
 // NATIVE_HALF: %[[p0:.*]] = load <2 x half>, ptr %p0.addr, align 4
 // NATIVE_HALF: %[[p1:.*]] = load <2 x half>, ptr %p1.addr, align 4
 // NATIVE_HALF: %[[p2:.*]] = load <2 x half>, ptr %p2.addr, align 4
-// NATIVE_HALF: %hlsl.fmad = call <2 x half>  @llvm.fmuladd.v2f16(<2 x half> %[[p0]], <2 x half> %[[p1]], <2 x half> %[[p2]])
+// NATIVE_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <2 x half>  @llvm.fmuladd.v2f16(<2 x half> %[[p0]], <2 x half> %[[p1]], <2 x half> %[[p2]])
 // NATIVE_HALF: ret <2 x half> %hlsl.fmad
 // NO_HALF: %[[p0:.*]] = load <2 x float>, ptr %p0.addr, align 8
 // NO_HALF: %[[p1:.*]] = load <2 x float>, ptr %p1.addr, align 8
 // NO_HALF: %[[p2:.*]] = load <2 x float>, ptr %p2.addr, align 8
-// NO_HALF: %hlsl.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %[[p0]], <2 x float> %[[p1]], <2 x float> %[[p2]])
+// NO_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %[[p0]], <2 x float> %[[p1]], <2 x float> %[[p2]])
 // NO_HALF: ret <2 x float> %hlsl.fmad
 half2 test_mad_half2(half2 p0, half2 p1, half2 p2) { return mad(p0, p1, p2); }
 
 // NATIVE_HALF: %[[p0:.*]] = load <3 x half>, ptr %p0.addr, align 8
 // NATIVE_HALF: %[[p1:.*]] = load <3 x half>, ptr %p1.addr, align 8
 // NATIVE_HALF: %[[p2:.*]] = load <3 x half>, ptr %p2.addr, align 8
-// NATIVE_HALF: %hlsl.fmad = call <3 x half>  @llvm.fmuladd.v3f16(<3 x half> %[[p0]], <3 x half> %[[p1]], <3 x half> %[[p2]])
+// NATIVE_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <3 x half>  @llvm.fmuladd.v3f16(<3 x half> %[[p0]], <3 x half> %[[p1]], <3 x half> %[[p2]])
 // NATIVE_HALF: ret <3 x half> %hlsl.fmad
 // NO_HALF: %[[p0:.*]] = load <3 x float>, ptr %p0.addr, align 16
 // NO_HALF: %[[p1:.*]] = load <3 x float>, ptr %p1.addr, align 16
 // NO_HALF: %[[p2:.*]] = load <3 x float>, ptr %p2.addr, align 16
-// NO_HALF: %hlsl.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %[[p0]], <3 x float> %[[p1]], <3 x float> %[[p2]])
+// NO_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %[[p0]], <3 x float> %[[p1]], <3 x float> %[[p2]])
 // NO_HALF: ret <3 x float> %hlsl.fmad
 half3 test_mad_half3(half3 p0, half3 p1, half3 p2) { return mad(p0, p1, p2); }
 
 // NATIVE_HALF: %[[p0:.*]] = load <4 x half>, ptr %p0.addr, align 8
 // NATIVE_HALF: %[[p1:.*]] = load <4 x half>, ptr %p1.addr, align 8
 // NATIVE_HALF: %[[p2:.*]] = load <4 x half>, ptr %p2.addr, align 8
-// NATIVE_HALF: %hlsl.fmad = call <4 x half>  @llvm.fmuladd.v4f16(<4 x half> %[[p0]], <4 x half> %[[p1]], <4 x half> %[[p2]])
+// NATIVE_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <4 x half>  @llvm.fmuladd.v4f16(<4 x half> %[[p0]], <4 x half> %[[p1]], <4 x half> %[[p2]])
 // NATIVE_HALF: ret <4 x half> %hlsl.fmad
 // NO_HALF: %[[p0:.*]] = load <4 x float>, ptr %p0.addr, align 16
 // NO_HALF: %[[p1:.*]] = load <4 x float>, ptr %p1.addr, align 16
 // NO_HALF: %[[p2:.*]] = load <4 x float>, ptr %p2.addr, align 16
-// NO_HALF: %hlsl.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %[[p0]], <4 x float> %[[p1]], <4 x float> %[[p2]])
+// NO_HALF: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %[[p0]], <4 x float> %[[p1]], <4 x float> %[[p2]])
 // NO_HALF: ret <4 x float> %hlsl.fmad
 half4 test_mad_half4(half4 p0, half4 p1, half4 p2) { return mad(p0, p1, p2); }
 
 // CHECK: %[[p0:.*]] = load float, ptr %p0.addr, align 4
 // CHECK: %[[p1:.*]] = load float, ptr %p1.addr, align 4
 // CHECK: %[[p2:.*]] = load float, ptr %p2.addr, align 4
-// CHECK: %hlsl.fmad = call float @llvm.fmuladd.f32(float %[[p0]], float %[[p1]], float %[[p2]])
+// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn float @llvm.fmuladd.f32(float %[[p0]], float %[[p1]], float %[[p2]])
 // CHECK: ret float %hlsl.fmad
 float test_mad_float(float p0, float p1, float p2) { return mad(p0, p1, p2); }
 
 // CHECK: %[[p0:.*]] = load <2 x float>, ptr %p0.addr, align 8
 // CHECK: %[[p1:.*]] = load <2 x float>, ptr %p1.addr, align 8
 // CHECK: %[[p2:.*]] = load <2 x float>, ptr %p2.addr, align 8
-// CHECK: %hlsl.fmad = call <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %[[p0]], <2 x float> %[[p1]], <2 x float> %[[p2]])
+// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <2 x float>  @llvm.fmuladd.v2f32(<2 x float> %[[p0]], <2 x float> %[[p1]], <2 x float> %[[p2]])
 // CHECK: ret <2 x float> %hlsl.fmad
 float2 test_mad_float2(float2 p0, float2 p1, float2 p2) { return mad(p0, p1, p2); }
 
 // CHECK: %[[p0:.*]] = load <3 x float>, ptr %p0.addr, align 16
 // CHECK: %[[p1:.*]] = load <3 x float>, ptr %p1.addr, align 16
 // CHECK: %[[p2:.*]] = load <3 x float>, ptr %p2.addr, align 16
-// CHECK: %hlsl.fmad = call <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %[[p0]], <3 x float> %[[p1]], <3 x float> %[[p2]])
+// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <3 x float>  @llvm.fmuladd.v3f32(<3 x float> %[[p0]], <3 x float> %[[p1]], <3 x float> %[[p2]])
 // CHECK: ret <3 x float> %hlsl.fmad
 float3 test_mad_float3(float3 p0, float3 p1, float3 p2) { return mad(p0, p1, p2); }
 
 // CHECK: %[[p0:.*]] = load <4 x float>, ptr %p0.addr, align 16
 // CHECK: %[[p1:.*]] = load <4 x float>, ptr %p1.addr, align 16
 // CHECK: %[[p2:.*]] = load <4 x float>, ptr %p2.addr, align 16
-// CHECK: %hlsl.fmad = call <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %[[p0]], <4 x float> %[[p1]], <4 x float> %[[p2]])
+// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <4 x float>  @llvm.fmuladd.v4f32(<4 x float> %[[p0]], <4 x float> %[[p1]], <4 x float> %[[p2]])
 // CHECK: ret <4 x float> %hlsl.fmad
 float4 test_mad_float4(float4 p0, float4 p1, float4 p2) { return mad(p0, p1, p2); }
 
 // CHECK: %[[p0:.*]] = load double, ptr %p0.addr, align 8
 // CHECK: %[[p1:.*]] = load double, ptr %p1.addr, align 8
 // CHECK: %[[p2:.*]] = load double, ptr %p2.addr, align 8
-// CHECK: %hlsl.fmad = call double @llvm.fmuladd.f64(double %[[p0]], double %[[p1]], double %[[p2]])
+// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn double @llvm.fmuladd.f64(double %[[p0]], double %[[p1]], double %[[p2]])
 // CHECK: ret double %hlsl.fmad
 double test_mad_double(double p0, double p1, double p2) { return mad(p0, p1, p2); }
 
 // CHECK: %[[p0:.*]] = load <2 x double>, ptr %p0.addr, align 16
 // CHECK: %[[p1:.*]] = load <2 x double>, ptr %p1.addr, align 16
 // CHECK: %[[p2:.*]] = load <2 x double>, ptr %p2.addr, align 16
-// CHECK: %hlsl.fmad = call <2 x double>  @llvm.fmuladd.v2f64(<2 x double> %[[p0]], <2 x double> %[[p1]], <2 x double> %[[p2]])
+// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <2 x double>  @llvm.fmuladd.v2f64(<2 x double> %[[p0]], <2 x double> %[[p1]], <2 x double> %[[p2]])
 // CHECK: ret <2 x double> %hlsl.fmad
 double2 test_mad_double2(double2 p0, double2 p1, double2 p2) { return mad(p0, p1, p2); }
 
 // CHECK: %[[p0:.*]] = load <3 x double>, ptr %p0.addr, align 32
 // CHECK: %[[p1:.*]] = load <3 x double>, ptr %p1.addr, align 32
 // CHECK: %[[p2:.*]] = load <3 x double>, ptr %p2.addr, align 32
-// CHECK: %hlsl.fmad = call <3 x double>  @llvm.fmuladd.v3f64(<3 x double> %[[p0]], <3 x double> %[[p1]], <3 x double> %[[p2]])
+// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <3 x double>  @llvm.fmuladd.v3f64(<3 x double> %[[p0]], <3 x double> %[[p1]], <3 x double> %[[p2]])
 // CHECK: ret <3 x double> %hlsl.fmad
 double3 test_mad_double3(double3 p0, double3 p1, double3 p2) { return mad(p0, p1, p2); }
 
 // CHECK: %[[p0:.*]] = load <4 x double>, ptr %p0.addr, align 32
 // CHECK: %[[p1:.*]] = load <4 x double>, ptr %p1.addr, align 32
 // CHECK: %[[p2:.*]] = load <4 x double>, ptr %p2.addr, align 32
-// CHECK: %hlsl.fmad = call <4 x double>  @llvm.fmuladd.v4f64(<4 x double> %[[p0]], <4 x double> %[[p1]], <4 x double> %[[p2]])
+// CHECK: %hlsl.fmad = call reassoc nnan ninf nsz arcp afn <4 x double>  @llvm.fmuladd.v4f64(<4 x double> %[[p0]], <4 x double> %[[p1]], <4 x double> %[[p2]])
 // CHECK: ret <4 x double> %hlsl.fmad
 double4 test_mad_double4(double4 p0, double4 p1, double4 p2) { return mad(p0, p1, p2); }
 
diff --git a/clang/test/CodeGenHLSL/builtins/max.hlsl b/clang/test/CodeGenHLSL/builtins/max.hlsl
index d462fda2ccb09..0b767335556ee 100644
--- a/clang/test/CodeGenHLSL/builtins/max.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/max.hlsl
@@ -85,49 +85,49 @@ uint64_t3 test_max_ulong3(uint64_t3 p0, uint64_t3 p1) { return max(p0, p1); }
 // CHECK: call <4 x i64> @llvm.umax.v4i64
 uint64_t4 test_max_ulong4(uint64_t4 p0, uint64_t4 p1) { return max(p0, p1); }
 
-// NATIVE_HALF-LABEL: define noundef half @_Z13test_max_half
-// NATIVE_HALF: call half @llvm.maxnum.f16(
-// NO_HALF-LABEL: define noundef float @_Z13test_max_half
-// NO_HALF: call float @llvm.maxnum.f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_max_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.maxnum.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_max_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.maxnum.f32(
 half test_max_half(half p0, half p1) { return max(p0, p1); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_max_half2
-// NATIVE_HALF: call <2 x half> @llvm.maxnum.v2f16
-// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_max_half2
-// NO_HALF: call <2 x float> @llvm.maxnum.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_max_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.maxnum.v2f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_max_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.maxnum.v2f32(
 half2 test_max_half2(half2 p0, half2 p1) { return max(p0, p1); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_max_half3
-// NATIVE_HALF: call <3 x half> @llvm.maxnum.v3f16
-// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_max_half3
-// NO_HALF: call <3 x float> @llvm.maxnum.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_max_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.maxnum.v3f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_max_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.maxnum.v3f32(
 half3 test_max_half3(half3 p0, half3 p1) { return max(p0, p1); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_max_half4
-// NATIVE_HALF: call <4 x half> @llvm.maxnum.v4f16
-// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_max_half4
-// NO_HALF: call <4 x float> @llvm.maxnum.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_max_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.maxnum.v4f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_max_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.maxnum.v4f32(
 half4 test_max_half4(half4 p0, half4 p1) { return max(p0, p1); }
 
-// CHECK-LABEL: define noundef float @_Z14test_max_float
-// CHECK: call float @llvm.maxnum.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_max_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.maxnum.f32(
 float test_max_float(float p0, float p1) { return max(p0, p1); }
-// CHECK-LABEL: define noundef <2 x float> @_Z15test_max_float2
-// CHECK: call <2 x float> @llvm.maxnum.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_max_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.maxnum.v2f32
 float2 test_max_float2(float2 p0, float2 p1) { return max(p0, p1); }
-// CHECK-LABEL: define noundef <3 x float> @_Z15test_max_float3
-// CHECK: call <3 x float> @llvm.maxnum.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_max_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.maxnum.v3f32
 float3 test_max_float3(float3 p0, float3 p1) { return max(p0, p1); }
-// CHECK-LABEL: define noundef <4 x float> @_Z15test_max_float4
-// CHECK: call <4 x float> @llvm.maxnum.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_max_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.maxnum.v4f32
 float4 test_max_float4(float4 p0, float4 p1) { return max(p0, p1); }
 
-// CHECK-LABEL: define noundef double @_Z15test_max_double
-// CHECK: call double @llvm.maxnum.f64(
+// CHECK-LABEL: define noundef nofpclass(nan inf) double @_Z15test_max_double
+// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.maxnum.f64(
 double test_max_double(double p0, double p1) { return max(p0, p1); }
-// CHECK-LABEL: define noundef <2 x double> @_Z16test_max_double2
-// CHECK: call <2 x double> @llvm.maxnum.v2f64
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x double> @_Z16test_max_double2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.maxnum.v2f64
 double2 test_max_double2(double2 p0, double2 p1) { return max(p0, p1); }
-// CHECK-LABEL: define noundef <3 x double> @_Z16test_max_double3
-// CHECK: call <3 x double> @llvm.maxnum.v3f64
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x double> @_Z16test_max_double3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.maxnum.v3f64
 double3 test_max_double3(double3 p0, double3 p1) { return max(p0, p1); }
-// CHECK-LABEL: define noundef <4 x double> @_Z16test_max_double4
-// CHECK: call <4 x double> @llvm.maxnum.v4f64
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x double> @_Z16test_max_double4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.maxnum.v4f64
 double4 test_max_double4(double4 p0, double4 p1) { return max(p0, p1); }
diff --git a/clang/test/CodeGenHLSL/builtins/min.hlsl b/clang/test/CodeGenHLSL/builtins/min.hlsl
index 02d20d13f916d..a352c72f29542 100644
--- a/clang/test/CodeGenHLSL/builtins/min.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/min.hlsl
@@ -85,49 +85,49 @@ uint64_t3 test_min_ulong3(uint64_t3 p0, uint64_t3 p1) { return min(p0, p1); }
 // CHECK: call <4 x i64> @llvm.umin.v4i64
 uint64_t4 test_min_ulong4(uint64_t4 p0, uint64_t4 p1) { return min(p0, p1); }
 
-// NATIVE_HALF-LABEL: define noundef half @_Z13test_min_half
-// NATIVE_HALF: call half @llvm.minnum.f16(
-// NO_HALF-LABEL: define noundef float @_Z13test_min_half
-// NO_HALF: call float @llvm.minnum.f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_min_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.minnum.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_min_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.minnum.f32(
 half test_min_half(half p0, half p1) { return min(p0, p1); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_min_half2
-// NATIVE_HALF: call <2 x half> @llvm.minnum.v2f16
-// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_min_half2
-// NO_HALF: call <2 x float> @llvm.minnum.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_min_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.minnum.v2f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_min_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.minnum.v2f32(
 half2 test_min_half2(half2 p0, half2 p1) { return min(p0, p1); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_min_half3
-// NATIVE_HALF: call <3 x half> @llvm.minnum.v3f16
-// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_min_half3
-// NO_HALF: call <3 x float> @llvm.minnum.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_min_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.minnum.v3f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_min_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.minnum.v3f32(
 half3 test_min_half3(half3 p0, half3 p1) { return min(p0, p1); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_min_half4
-// NATIVE_HALF: call <4 x half> @llvm.minnum.v4f16
-// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_min_half4
-// NO_HALF: call <4 x float> @llvm.minnum.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_min_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.minnum.v4f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_min_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.minnum.v4f32(
 half4 test_min_half4(half4 p0, half4 p1) { return min(p0, p1); }
 
-// CHECK-LABEL: define noundef float @_Z14test_min_float
-// CHECK: call float @llvm.minnum.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_min_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.minnum.f32(
 float test_min_float(float p0, float p1) { return min(p0, p1); }
-// CHECK-LABEL: define noundef <2 x float> @_Z15test_min_float2
-// CHECK: call <2 x float> @llvm.minnum.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_min_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.minnum.v2f32
 float2 test_min_float2(float2 p0, float2 p1) { return min(p0, p1); }
-// CHECK-LABEL: define noundef <3 x float> @_Z15test_min_float3
-// CHECK: call <3 x float> @llvm.minnum.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_min_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.minnum.v3f32
 float3 test_min_float3(float3 p0, float3 p1) { return min(p0, p1); }
-// CHECK-LABEL: define noundef <4 x float> @_Z15test_min_float4
-// CHECK: call <4 x float> @llvm.minnum.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_min_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.minnum.v4f32
 float4 test_min_float4(float4 p0, float4 p1) { return min(p0, p1); }
 
-// CHECK-LABEL: define noundef double @_Z15test_min_double
-// CHECK: call double @llvm.minnum.f64(
+// CHECK-LABEL: define noundef nofpclass(nan inf) double @_Z15test_min_double
+// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.minnum.f64(
 double test_min_double(double p0, double p1) { return min(p0, p1); }
-// CHECK-LABEL: define noundef <2 x double> @_Z16test_min_double2
-// CHECK: call <2 x double> @llvm.minnum.v2f64
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x double> @_Z16test_min_double2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.minnum.v2f64
 double2 test_min_double2(double2 p0, double2 p1) { return min(p0, p1); }
-// CHECK-LABEL: define noundef <3 x double> @_Z16test_min_double3
-// CHECK: call <3 x double> @llvm.minnum.v3f64
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x double> @_Z16test_min_double3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.minnum.v3f64
 double3 test_min_double3(double3 p0, double3 p1) { return min(p0, p1); }
-// CHECK-LABEL: define noundef <4 x double> @_Z16test_min_double4
-// CHECK: call <4 x double> @llvm.minnum.v4f64
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x double> @_Z16test_min_double4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.minnum.v4f64
 double4 test_min_double4(double4 p0, double4 p1) { return min(p0, p1); }
diff --git a/clang/test/CodeGenHLSL/builtins/normalize.hlsl b/clang/test/CodeGenHLSL/builtins/normalize.hlsl
index 83ad607c14a60..830fc26b7acf0 100644
--- a/clang/test/CodeGenHLSL/builtins/normalize.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/normalize.hlsl
@@ -2,24 +2,24 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 
 // NATIVE_HALF: define [[FNATTRS]] half @
-// NATIVE_HALF: call half @llvm.[[TARGET]].normalize.f16(half
-// NO_HALF: call float @llvm.[[TARGET]].normalize.f32(float
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].normalize.f16(half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].normalize.f32(float
 // NATIVE_HALF: ret half
 // NO_HALF: ret float
 half test_normalize_half(half p0)
@@ -27,8 +27,8 @@ half test_normalize_half(half p0)
     return normalize(p0);
 }
 // NATIVE_HALF: define [[FNATTRS]] <2 x half> @
-// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].normalize.v2f16(<2 x half>
-// NO_HALF: call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].normalize.v2f16(<2 x half>
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
 // NATIVE_HALF: ret <2 x half> %hlsl.normalize
 // NO_HALF: ret <2 x float> %hlsl.normalize
 half2 test_normalize_half2(half2 p0)
@@ -36,8 +36,8 @@ half2 test_normalize_half2(half2 p0)
     return normalize(p0);
 }
 // NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].normalize.v3f16(<3 x half>
-// NO_HALF: call <3 x float> @llvm.[[TARGET]].normalize.v3f32(<3 x float>
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].normalize.v3f16(<3 x half>
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].normalize.v3f32(<3 x float>
 // NATIVE_HALF: ret <3 x half> %hlsl.normalize
 // NO_HALF: ret <3 x float> %hlsl.normalize
 half3 test_normalize_half3(half3 p0)
@@ -45,8 +45,8 @@ half3 test_normalize_half3(half3 p0)
     return normalize(p0);
 }
 // NATIVE_HALF: define [[FNATTRS]] <4 x half> @
-// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].normalize.v4f16(<4 x half>
-// NO_HALF: call <4 x float> @llvm.[[TARGET]].normalize.v4f32(<4 x float>
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].normalize.v4f16(<4 x half>
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].normalize.v4f32(<4 x float>
 // NATIVE_HALF: ret <4 x half> %hlsl.normalize
 // NO_HALF: ret <4 x float> %hlsl.normalize
 half4 test_normalize_half4(half4 p0)
@@ -55,14 +55,14 @@ half4 test_normalize_half4(half4 p0)
 }
 
 // CHECK: define [[FNATTRS]] float @
-// CHECK: call float @llvm.[[TARGET]].normalize.f32(float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].normalize.f32(float
 // CHECK: ret float
 float test_normalize_float(float p0)
 {
     return normalize(p0);
 }
 // CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %hlsl.normalize = call <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
+// CHECK: %hlsl.normalize = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].normalize.v2f32(<2 x float>
 
 // CHECK: ret <2 x float> %hlsl.normalize
 float2 test_normalize_float2(float2 p0)
@@ -70,14 +70,14 @@ float2 test_normalize_float2(float2 p0)
     return normalize(p0);
 }
 // CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %hlsl.normalize = call <3 x float> @llvm.[[TARGET]].normalize.v3f32(
+// CHECK: %hlsl.normalize = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].normalize.v3f32(
 // CHECK: ret <3 x float> %hlsl.normalize
 float3 test_normalize_float3(float3 p0)
 {
     return normalize(p0);
 }
 // CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %hlsl.normalize = call <4 x float> @llvm.[[TARGET]].normalize.v4f32(
+// CHECK: %hlsl.normalize = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].normalize.v4f32(
 // CHECK: ret <4 x float> %hlsl.normalize
 float4 test_length_float4(float4 p0)
 {
diff --git a/clang/test/CodeGenHLSL/builtins/pow.hlsl b/clang/test/CodeGenHLSL/builtins/pow.hlsl
index 4e18480763343..fd21f1b94c57e 100644
--- a/clang/test/CodeGenHLSL/builtins/pow.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/pow.hlsl
@@ -5,36 +5,36 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z13test_pow_half
-// NATIVE_HALF: call half @llvm.pow.f16(
-// NO_HALF-LABEL: define noundef float @_Z13test_pow_half
-// NO_HALF: call float @llvm.pow.f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_pow_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.pow.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_pow_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.pow.f32(
 half test_pow_half(half p0, half p1) { return pow(p0, p1); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_pow_half2
-// NATIVE_HALF: call <2 x half> @llvm.pow.v2f16
-// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_pow_half2
-// NO_HALF: call <2 x float> @llvm.pow.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_pow_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.pow.v2f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_pow_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.pow.v2f32(
 half2 test_pow_half2(half2 p0, half2 p1) { return pow(p0, p1); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_pow_half3
-// NATIVE_HALF: call <3 x half> @llvm.pow.v3f16
-// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_pow_half3
-// NO_HALF: call <3 x float> @llvm.pow.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_pow_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.pow.v3f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_pow_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.pow.v3f32(
 half3 test_pow_half3(half3 p0, half3 p1) { return pow(p0, p1); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_pow_half4
-// NATIVE_HALF: call <4 x half> @llvm.pow.v4f16
-// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_pow_half4
-// NO_HALF: call <4 x float> @llvm.pow.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_pow_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.pow.v4f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_pow_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.pow.v4f32(
 half4 test_pow_half4(half4 p0, half4 p1) { return pow(p0, p1); }
 
-// CHECK-LABEL: define noundef float @_Z14test_pow_float
-// CHECK: call float @llvm.pow.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_pow_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.pow.f32(
 float test_pow_float(float p0, float p1) { return pow(p0, p1); }
-// CHECK-LABEL: define noundef <2 x float> @_Z15test_pow_float2
-// CHECK: call <2 x float> @llvm.pow.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_pow_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.pow.v2f32
 float2 test_pow_float2(float2 p0, float2 p1) { return pow(p0, p1); }
-// CHECK-LABEL: define noundef <3 x float> @_Z15test_pow_float3
-// CHECK: call <3 x float> @llvm.pow.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_pow_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.pow.v3f32
 float3 test_pow_float3(float3 p0, float3 p1) { return pow(p0, p1); }
-// CHECK-LABEL: define noundef <4 x float> @_Z15test_pow_float4
-// CHECK: call <4 x float> @llvm.pow.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_pow_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.pow.v4f32
 float4 test_pow_float4(float4 p0, float4 p1) { return pow(p0, p1); }
diff --git a/clang/test/CodeGenHLSL/builtins/radians.hlsl b/clang/test/CodeGenHLSL/builtins/radians.hlsl
index 774300525dbf0..b2b6190ea90f6 100644
--- a/clang/test/CodeGenHLSL/builtins/radians.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/radians.hlsl
@@ -2,65 +2,65 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DTARGET=dx -DFNATTRS=noundef
+// RUN:   -DTARGET=dx -DFNATTRS="noundef nofpclass(nan inf)"
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DTARGET=dx -DFNATTRS=noundef
+// RUN:   -DTARGET=dx -DFNATTRS="noundef nofpclass(nan inf)"
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DTARGET=spv -DFNATTRS="spir_func noundef"
+// RUN:   -DTARGET=spv -DFNATTRS="spir_func noundef nofpclass(nan inf)"
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DTARGET=spv -DFNATTRS="spir_func noundef"
+// RUN:   -DTARGET=spv -DFNATTRS="spir_func noundef nofpclass(nan inf)"
 
 
 // NATIVE_HALF: define [[FNATTRS]] half @
-// NATIVE_HALF: %{{.*}} = call half @llvm.[[TARGET]].radians.f16(
+// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].radians.f16(
 // NATIVE_HALF: ret half %{{.*}}
 // NO_HALF: define [[FNATTRS]] float @
-// NO_HALF: %{{.*}} = call float @llvm.[[TARGET]].radians.f32(
+// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].radians.f32(
 // NO_HALF: ret float %{{.*}}
 half test_radians_half(half p0) { return radians(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <2 x half> @
-// NATIVE_HALF: %{{.*}} = call <2 x half> @llvm.[[TARGET]].radians.v2f16
+// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].radians.v2f16
 // NATIVE_HALF: ret <2 x half> %{{.*}}
 // NO_HALF: define [[FNATTRS]] <2 x float> @
-// NO_HALF: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32(
+// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].radians.v2f32(
 // NO_HALF: ret <2 x float> %{{.*}}
 half2 test_radians_half2(half2 p0) { return radians(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: %{{.*}} = call <3 x half> @llvm.[[TARGET]].radians.v3f16
+// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].radians.v3f16
 // NATIVE_HALF: ret <3 x half> %{{.*}}
 // NO_HALF: define [[FNATTRS]] <3 x float> @
-// NO_HALF: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32(
+// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].radians.v3f32(
 // NO_HALF: ret <3 x float> %{{.*}}
 half3 test_radians_half3(half3 p0) { return radians(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <4 x half> @
-// NATIVE_HALF: %{{.*}} = call <4 x half> @llvm.[[TARGET]].radians.v4f16
+// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].radians.v4f16
 // NATIVE_HALF: ret <4 x half> %{{.*}}
 // NO_HALF: define [[FNATTRS]] <4 x float> @
-// NO_HALF: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32(
+// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].radians.v4f32(
 // NO_HALF: ret <4 x float> %{{.*}}
 half4 test_radians_half4(half4 p0) { return radians(p0); }
 
 // CHECK: define [[FNATTRS]] float @
-// CHECK: %{{.*}} = call float @llvm.[[TARGET]].radians.f32(
+// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].radians.f32(
 // CHECK: ret float %{{.*}}
 float test_radians_float(float p0) { return radians(p0); }
 // CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32
+// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].radians.v2f32
 // CHECK: ret <2 x float> %{{.*}}
 float2 test_radians_float2(float2 p0) { return radians(p0); }
 // CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32
+// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].radians.v3f32
 // CHECK: ret <3 x float> %{{.*}}
 float3 test_radians_float3(float3 p0) { return radians(p0); }
 // CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32
+// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].radians.v4f32
 // CHECK: ret <4 x float> %{{.*}}
 float4 test_radians_float4(float4 p0) { return radians(p0); }
 
diff --git a/clang/test/CodeGenHLSL/builtins/rcp.hlsl b/clang/test/CodeGenHLSL/builtins/rcp.hlsl
index 83fe33406c7c8..8f07f3a031531 100644
--- a/clang/test/CodeGenHLSL/builtins/rcp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/rcp.hlsl
@@ -13,90 +13,90 @@
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_NO_HALF,SPIR_CHECK
 
-// DXIL_NATIVE_HALF: define noundef half @
-// SPIR_NATIVE_HALF: define spir_func noundef half @
-// NATIVE_HALF: %hlsl.rcp = fdiv half 0xH3C00, %{{.*}} 
+// DXIL_NATIVE_HALF: define noundef nofpclass(nan inf) half @
+// SPIR_NATIVE_HALF: define spir_func noundef nofpclass(nan inf) half @
+// NATIVE_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn half 0xH3C00, %{{.*}} 
 // NATIVE_HALF: ret half %hlsl.rcp
-// DXIL_NO_HALF: define noundef float @
-// SPIR_NO_HALF: define spir_func noundef float @
-// NO_HALF: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}}
+// DXIL_NO_HALF: define noundef nofpclass(nan inf) float @
+// SPIR_NO_HALF: define spir_func noundef nofpclass(nan inf) float @
+// NO_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn float 1.000000e+00, %{{.*}}
 // NO_HALF: ret float %hlsl.rcp
 half test_rcp_half(half p0) { return rcp(p0); }
 
-// DXIL_NATIVE_HALF: define noundef <2 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
-// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> splat (half  0xH3C00), %{{.*}} 
+// DXIL_NATIVE_HALF: define noundef nofpclass(nan inf) <2 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef nofpclass(nan inf) <2 x half> @
+// NATIVE_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <2 x half> splat (half  0xH3C00), %{{.*}} 
 // NATIVE_HALF: ret <2 x half> %hlsl.rcp
-// DXIL_NO_HALF: define noundef <2 x float> @
-// SPIR_NO_HALF: define spir_func noundef <2 x float> @
-// NO_HALF: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}}
+// DXIL_NO_HALF: define noundef nofpclass(nan inf) <2 x float> @
+// SPIR_NO_HALF: define spir_func noundef nofpclass(nan inf) <2 x float> @
+// NO_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <2 x float> splat (float 1.000000e+00), %{{.*}}
 // NO_HALF: ret <2 x float> %hlsl.rcp
 half2 test_rcp_half2(half2 p0) { return rcp(p0); }
 
-// DXIL_NATIVE_HALF: define noundef <3 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
-// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> splat (half  0xH3C00), %{{.*}} 
+// DXIL_NATIVE_HALF: define noundef nofpclass(nan inf) <3 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef nofpclass(nan inf) <3 x half> @
+// NATIVE_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <3 x half> splat (half  0xH3C00), %{{.*}} 
 // NATIVE_HALF: ret <3 x half> %hlsl.rcp
-// DXIL_NO_HALF: define noundef <3 x float> @
-// SPIR_NO_HALF: define spir_func noundef <3 x float> @
-// NO_HALF: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}}
+// DXIL_NO_HALF: define noundef nofpclass(nan inf) <3 x float> @
+// SPIR_NO_HALF: define spir_func noundef nofpclass(nan inf) <3 x float> @
+// NO_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <3 x float> splat (float 1.000000e+00), %{{.*}}
 // NO_HALF: ret <3 x float> %hlsl.rcp
 half3 test_rcp_half3(half3 p0) { return rcp(p0); }
 
-// DXIL_NATIVE_HALF: define noundef <4 x half> @
-// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
-// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> splat (half  0xH3C00), %{{.*}} 
+// DXIL_NATIVE_HALF: define noundef nofpclass(nan inf) <4 x half> @
+// SPIR_NATIVE_HALF: define spir_func noundef nofpclass(nan inf) <4 x half> @
+// NATIVE_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <4 x half> splat (half  0xH3C00), %{{.*}} 
 // NATIVE_HALF: ret <4 x half> %hlsl.rcp
-// DXIL_NO_HALF: define noundef <4 x float> @
-// SPIR_NO_HALF: define spir_func noundef <4 x float> @
-// NO_HALF: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}}
+// DXIL_NO_HALF: define noundef nofpclass(nan inf) <4 x float> @
+// SPIR_NO_HALF: define spir_func noundef nofpclass(nan inf) <4 x float> @
+// NO_HALF: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <4 x float> splat (float 1.000000e+00), %{{.*}}
 // NO_HALF: ret <4 x float> %hlsl.rcp
 half4 test_rcp_half4(half4 p0) { return rcp(p0); }
 
-// DXIL_CHECK: define noundef float @
-// SPIR_CHECK: define spir_func noundef float @
-// CHECK: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}}
+// DXIL_CHECK: define noundef nofpclass(nan inf) float @
+// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) float @
+// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn float 1.000000e+00, %{{.*}}
 // CHECK: ret float %hlsl.rcp
 float test_rcp_float(float p0) { return rcp(p0); }
 
-// DXIL_CHECK: define noundef <2 x float> @
-// SPIR_CHECK: define spir_func noundef <2 x float> @
-// CHECK: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}}
+// DXIL_CHECK: define noundef nofpclass(nan inf) <2 x float> @
+// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <2 x float> @
+// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <2 x float> splat (float 1.000000e+00), %{{.*}}
 // CHECK: ret <2 x float> %hlsl.rcp
 float2 test_rcp_float2(float2 p0) { return rcp(p0); }
 
-// DXIL_CHECK: define noundef <3 x float> @
-// SPIR_CHECK: define spir_func noundef <3 x float> @
-// CHECK: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}}
+// DXIL_CHECK: define noundef nofpclass(nan inf) <3 x float> @
+// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <3 x float> @
+// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <3 x float> splat (float 1.000000e+00), %{{.*}}
 // CHECK: ret <3 x float> %hlsl.rcp
 float3 test_rcp_float3(float3 p0) { return rcp(p0); }
 
-// DXIL_CHECK: define noundef <4 x float> @
-// SPIR_CHECK: define spir_func noundef <4 x float> @
-// CHECK: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}}
+// DXIL_CHECK: define noundef nofpclass(nan inf) <4 x float> @
+// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <4 x float> @
+// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <4 x float> splat (float 1.000000e+00), %{{.*}}
 // CHECK: ret <4 x float> %hlsl.rcp
 float4 test_rcp_float4(float4 p0) { return rcp(p0); }
 
-// DXIL_CHECK: define noundef double @
-// SPIR_CHECK: define spir_func noundef double @
-// CHECK: %hlsl.rcp = fdiv double 1.000000e+00, %{{.*}} 
+// DXIL_CHECK: define noundef nofpclass(nan inf) double @
+// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) double @
+// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn double 1.000000e+00, %{{.*}} 
 // CHECK: ret double %hlsl.rcp
 double test_rcp_double(double p0) { return rcp(p0); }
 
-// DXIL_CHECK: define noundef <2 x double> @
-// SPIR_CHECK: define spir_func noundef <2 x double> @
-// CHECK: %hlsl.rcp = fdiv <2 x double> splat (double 1.000000e+00), %{{.*}}
+// DXIL_CHECK: define noundef nofpclass(nan inf) <2 x double> @
+// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <2 x double> @
+// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <2 x double> splat (double 1.000000e+00), %{{.*}}
 // CHECK: ret <2 x double> %hlsl.rcp
 double2 test_rcp_double2(double2 p0) { return rcp(p0); }
 
-// DXIL_CHECK: define noundef <3 x double> @
-// SPIR_CHECK: define spir_func noundef <3 x double> @
-// CHECK: %hlsl.rcp = fdiv <3 x double> splat (double 1.000000e+00), %{{.*}}
+// DXIL_CHECK: define noundef nofpclass(nan inf) <3 x double> @
+// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <3 x double> @
+// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <3 x double> splat (double 1.000000e+00), %{{.*}}
 // CHECK: ret <3 x double> %hlsl.rcp
 double3 test_rcp_double3(double3 p0) { return rcp(p0); }
 
-// DXIL_CHECK: define noundef <4 x double> @
-// SPIR_CHECK: define spir_func noundef <4 x double> @
-// CHECK: %hlsl.rcp = fdiv <4 x double> splat (double 1.000000e+00), %{{.*}}
+// DXIL_CHECK: define noundef nofpclass(nan inf) <4 x double> @
+// SPIR_CHECK: define spir_func noundef nofpclass(nan inf) <4 x double> @
+// CHECK: %hlsl.rcp = fdiv reassoc nnan ninf nsz arcp afn <4 x double> splat (double 1.000000e+00), %{{.*}}
 // CHECK: ret <4 x double> %hlsl.rcp
 double4 test_rcp_double4(double4 p0) { return rcp(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/round.hlsl b/clang/test/CodeGenHLSL/builtins/round.hlsl
index 6da63a394a8fd..a945a9677abbb 100644
--- a/clang/test/CodeGenHLSL/builtins/round.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/round.hlsl
@@ -5,48 +5,48 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z15test_round_half
-// NATIVE_HALF: %elt.roundeven = call half @llvm.roundeven.f16(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z15test_round_half
+// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn half @llvm.roundeven.f16(
 // NATIVE_HALF: ret half %elt.roundeven
-// NO_HALF-LABEL: define noundef float @_Z15test_round_half
-// NO_HALF: %elt.roundeven = call float @llvm.roundeven.f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z15test_round_half
+// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32(
 // NO_HALF: ret float %elt.roundeven
 half test_round_half(half p0) { return round(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_round_half2
-// NATIVE_HALF: %elt.roundeven = call <2 x half> @llvm.roundeven.v2f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z16test_round_half2
+// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.roundeven.v2f16
 // NATIVE_HALF: ret <2 x half> %elt.roundeven
-// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_round_half2
-// NO_HALF: %elt.roundeven = call <2 x float> @llvm.roundeven.v2f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_round_half2
+// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32(
 // NO_HALF: ret <2 x float> %elt.roundeven
 half2 test_round_half2(half2 p0) { return round(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_round_half3
-// NATIVE_HALF: %elt.roundeven = call <3 x half> @llvm.roundeven.v3f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z16test_round_half3
+// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.roundeven.v3f16
 // NATIVE_HALF: ret <3 x half> %elt.roundeven
-// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_round_half3
-// NO_HALF: %elt.roundeven = call <3 x float> @llvm.roundeven.v3f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_round_half3
+// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32(
 // NO_HALF: ret <3 x float> %elt.roundeven
 half3 test_round_half3(half3 p0) { return round(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_round_half4
-// NATIVE_HALF: %elt.roundeven = call <4 x half> @llvm.roundeven.v4f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z16test_round_half4
+// NATIVE_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.roundeven.v4f16
 // NATIVE_HALF: ret <4 x half> %elt.roundeven
-// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_round_half4
-// NO_HALF: %elt.roundeven = call <4 x float> @llvm.roundeven.v4f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_round_half4
+// NO_HALF: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32(
 // NO_HALF: ret <4 x float> %elt.roundeven
 half4 test_round_half4(half4 p0) { return round(p0); }
 
-// CHECK-LABEL: define noundef float @_Z16test_round_float
-// CHECK: %elt.roundeven = call float @llvm.roundeven.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z16test_round_float
+// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn float @llvm.roundeven.f32(
 // CHECK: ret float %elt.roundeven
 float test_round_float(float p0) { return round(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z17test_round_float2
-// CHECK: %elt.roundeven = call <2 x float> @llvm.roundeven.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z17test_round_float2
+// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.roundeven.v2f32
 // CHECK: ret <2 x float> %elt.roundeven
 float2 test_round_float2(float2 p0) { return round(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z17test_round_float3
-// CHECK: %elt.roundeven = call <3 x float> @llvm.roundeven.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z17test_round_float3
+// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.roundeven.v3f32
 // CHECK: ret <3 x float> %elt.roundeven
 float3 test_round_float3(float3 p0) { return round(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z17test_round_float4
-// CHECK: %elt.roundeven = call <4 x float> @llvm.roundeven.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z17test_round_float4
+// CHECK: %elt.roundeven = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.roundeven.v4f32
 // CHECK: ret <4 x float> %elt.roundeven
 float4 test_round_float4(float4 p0) { return round(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl b/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
index b1b53fc187da6..6c9b1f643713b 100644
--- a/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
@@ -2,63 +2,63 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 
 // NATIVE_HALF: define [[FNATTRS]] half @
-// NATIVE_HALF: %hlsl.rsqrt = call half @llvm.[[TARGET]].rsqrt.f16(
+// NATIVE_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].rsqrt.f16(
 // NATIVE_HALF: ret half %hlsl.rsqrt
 // NO_HALF: define [[FNATTRS]] float @
-// NO_HALF: %hlsl.rsqrt = call float @llvm.[[TARGET]].rsqrt.f32(
+// NO_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].rsqrt.f32(
 // NO_HALF: ret float %hlsl.rsqrt
 half test_rsqrt_half(half p0) { return rsqrt(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <2 x half> @
-// NATIVE_HALF: %hlsl.rsqrt = call <2 x half> @llvm.[[TARGET]].rsqrt.v2f16
+// NATIVE_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].rsqrt.v2f16
 // NATIVE_HALF: ret <2 x half> %hlsl.rsqrt
 // NO_HALF: define [[FNATTRS]] <2 x float> @
-// NO_HALF: %hlsl.rsqrt = call <2 x float> @llvm.[[TARGET]].rsqrt.v2f32(
+// NO_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].rsqrt.v2f32(
 // NO_HALF: ret <2 x float> %hlsl.rsqrt
 half2 test_rsqrt_half2(half2 p0) { return rsqrt(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: %hlsl.rsqrt = call <3 x half> @llvm.[[TARGET]].rsqrt.v3f16
+// NATIVE_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].rsqrt.v3f16
 // NATIVE_HALF: ret <3 x half> %hlsl.rsqrt
 // NO_HALF: define [[FNATTRS]] <3 x float> @
-// NO_HALF: %hlsl.rsqrt = call <3 x float> @llvm.[[TARGET]].rsqrt.v3f32(
+// NO_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].rsqrt.v3f32(
 // NO_HALF: ret <3 x float> %hlsl.rsqrt
 half3 test_rsqrt_half3(half3 p0) { return rsqrt(p0); }
 // NATIVE_HALF: define [[FNATTRS]] <4 x half> @
-// NATIVE_HALF: %hlsl.rsqrt = call <4 x half> @llvm.[[TARGET]].rsqrt.v4f16
+// NATIVE_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].rsqrt.v4f16
 // NATIVE_HALF: ret <4 x half> %hlsl.rsqrt
 // NO_HALF: define [[FNATTRS]] <4 x float> @
-// NO_HALF: %hlsl.rsqrt = call <4 x float> @llvm.[[TARGET]].rsqrt.v4f32(
+// NO_HALF: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].rsqrt.v4f32(
 // NO_HALF: ret <4 x float> %hlsl.rsqrt
 half4 test_rsqrt_half4(half4 p0) { return rsqrt(p0); }
 
 // CHECK: define [[FNATTRS]] float @
-// CHECK: %hlsl.rsqrt = call float @llvm.[[TARGET]].rsqrt.f32(
+// CHECK: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].rsqrt.f32(
 // CHECK: ret float %hlsl.rsqrt
 float test_rsqrt_float(float p0) { return rsqrt(p0); }
 // CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %hlsl.rsqrt = call <2 x float> @llvm.[[TARGET]].rsqrt.v2f32
+// CHECK: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].rsqrt.v2f32
 // CHECK: ret <2 x float> %hlsl.rsqrt
 float2 test_rsqrt_float2(float2 p0) { return rsqrt(p0); }
 // CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %hlsl.rsqrt = call <3 x float> @llvm.[[TARGET]].rsqrt.v3f32
+// CHECK: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].rsqrt.v3f32
 // CHECK: ret <3 x float> %hlsl.rsqrt
 float3 test_rsqrt_float3(float3 p0) { return rsqrt(p0); }
 // CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %hlsl.rsqrt = call <4 x float> @llvm.[[TARGET]].rsqrt.v4f32
+// CHECK: %hlsl.rsqrt = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].rsqrt.v4f32
 // CHECK: ret <4 x float> %hlsl.rsqrt
 float4 test_rsqrt_float4(float4 p0) { return rsqrt(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/saturate.hlsl b/clang/test/CodeGenHLSL/builtins/saturate.hlsl
index c221f6e0f2c36..3304073d9b501 100644
--- a/clang/test/CodeGenHLSL/builtins/saturate.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/saturate.hlsl
@@ -13,48 +13,48 @@
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF -Dtar=spv
 
 // NATIVE_HALF-LABEL: define{{.*}} half @_Z18test_saturate_halfDh
-// NATIVE_HALF: call half @llvm.[[tar]].saturate.f16(
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[tar]].saturate.f16(
 // NO_HALF-LABEL: define{{.*}} float @_Z18test_saturate_halfDh
-// NO_HALF: call float @llvm.[[tar]].saturate.f32(
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.[[tar]].saturate.f32(
 half test_saturate_half(half p0) { return saturate(p0); }
 // NATIVE_HALF-LABEL: define{{.*}} <2 x half> @_Z19test_saturate_half2Dv2_Dh
-// NATIVE_HALF: call <2 x half> @llvm.[[tar]].saturate.v2f16
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[tar]].saturate.v2f16
 // NO_HALF-LABEL: define{{.*}} <2 x float> @_Z19test_saturate_half2Dv2_Dh
-// NO_HALF: call <2 x float> @llvm.[[tar]].saturate.v2f32(
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[tar]].saturate.v2f32(
 half2 test_saturate_half2(half2 p0) { return saturate(p0); }
 // NATIVE_HALF-LABEL: define{{.*}} <3 x half> @_Z19test_saturate_half3Dv3_Dh(
-// NATIVE_HALF: call <3 x half> @llvm.[[tar]].saturate.v3f16
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[tar]].saturate.v3f16
 // NO_HALF-LABEL: define{{.*}} <3 x float> @_Z19test_saturate_half3Dv3_Dh(<3 x float>
-// NO_HALF: call <3 x float> @llvm.[[tar]].saturate.v3f32(
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[tar]].saturate.v3f32(
 half3 test_saturate_half3(half3 p0) { return saturate(p0); }
 // NATIVE_HALF-LABEL: define{{.*}} <4 x half> @_Z19test_saturate_half4Dv4_Dh(
-// NATIVE_HALF: call <4 x half> @llvm.[[tar]].saturate.v4f16
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[tar]].saturate.v4f16
 // NO_HALF-LABEL: define{{.*}} <4 x float> @_Z19test_saturate_half4Dv4_Dh(<4 x float>
-// NO_HALF: call <4 x float> @llvm.[[tar]].saturate.v4f32(
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[tar]].saturate.v4f32(
 half4 test_saturate_half4(half4 p0) { return saturate(p0); }
 
 // CHECK-LABEL: define{{.*}} float @_Z19test_saturate_floatf(
-// CHECK: call float @llvm.[[tar]].saturate.f32(
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.[[tar]].saturate.f32(
 float test_saturate_float(float p0) { return saturate(p0); }
 // CHECK-LABEL: define{{.*}} <2 x float> @_Z20test_saturate_float2Dv2_f(<2 x float>
-// CHECK: call <2 x float> @llvm.[[tar]].saturate.v2f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[tar]].saturate.v2f32
 float2 test_saturate_float2(float2 p0) { return saturate(p0); }
 // CHECK-LABEL: define{{.*}} <3 x float> @_Z20test_saturate_float3Dv3_f(<3 x float>
-// CHECK: call <3 x float> @llvm.[[tar]].saturate.v3f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[tar]].saturate.v3f32
 float3 test_saturate_float3(float3 p0) { return saturate(p0); }
 // CHECK-LABEL: define{{.*}} <4 x float> @_Z20test_saturate_float4Dv4_f(<4 x float>
-// CHECK: call <4 x float> @llvm.[[tar]].saturate.v4f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[tar]].saturate.v4f32
 float4 test_saturate_float4(float4 p0) { return saturate(p0); }
 
 // CHECK-LABEL: define{{.*}} double @_Z20test_saturate_doubled(double
-// CHECK: call double @llvm.[[tar]].saturate.f64(
+// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.[[tar]].saturate.f64(
 double test_saturate_double(double p0) { return saturate(p0); }
 // CHECK-LABEL: define{{.*}} <2 x double> @_Z21test_saturate_double2Dv2_d(<2 x double>
-// CHECK: call <2 x double> @llvm.[[tar]].saturate.v2f64
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.[[tar]].saturate.v2f64
 double2 test_saturate_double2(double2 p0) { return saturate(p0); }
 // CHECK-LABEL: define{{.*}} <3 x double> @_Z21test_saturate_double3Dv3_d(<3 x double>
-// CHECK: call <3 x double> @llvm.[[tar]].saturate.v3f64
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.[[tar]].saturate.v3f64
 double3 test_saturate_double3(double3 p0) { return saturate(p0); }
 // CHECK-LABEL: define{{.*}} <4 x double> @_Z21test_saturate_double4Dv4_d(<4 x double>
-// CHECK: call <4 x double> @llvm.[[tar]].saturate.v4f64
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[tar]].saturate.v4f64
 double4 test_saturate_double4(double4 p0) { return saturate(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/sin.hlsl b/clang/test/CodeGenHLSL/builtins/sin.hlsl
index 9f7fa5043bdc7..69c657239ef95 100644
--- a/clang/test/CodeGenHLSL/builtins/sin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/sin.hlsl
@@ -5,36 +5,36 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z13test_sin_half
-// NATIVE_HALF: call half @llvm.sin.f16(
-// NO_HALF-LABEL: define noundef float @_Z13test_sin_half
-// NO_HALF: call float @llvm.sin.f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z13test_sin_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.sin.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z13test_sin_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.sin.f32(
 half test_sin_half(half p0) { return sin(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z14test_sin_half2
-// NATIVE_HALF: call <2 x half> @llvm.sin.v2f16
-// NO_HALF-LABEL: define noundef <2 x float> @_Z14test_sin_half2
-// NO_HALF: call <2 x float> @llvm.sin.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z14test_sin_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.sin.v2f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z14test_sin_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sin.v2f32(
 half2 test_sin_half2(half2 p0) { return sin(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z14test_sin_half3
-// NATIVE_HALF: call <3 x half> @llvm.sin.v3f16
-// NO_HALF-LABEL: define noundef <3 x float> @_Z14test_sin_half3
-// NO_HALF: call <3 x float> @llvm.sin.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z14test_sin_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.sin.v3f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z14test_sin_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sin.v3f32(
 half3 test_sin_half3(half3 p0) { return sin(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z14test_sin_half4
-// NATIVE_HALF: call <4 x half> @llvm.sin.v4f16
-// NO_HALF-LABEL: define noundef <4 x float> @_Z14test_sin_half4
-// NO_HALF: call <4 x float> @llvm.sin.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z14test_sin_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.sin.v4f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z14test_sin_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sin.v4f32(
 half4 test_sin_half4(half4 p0) { return sin(p0); }
 
-// CHECK-LABEL: define noundef float @_Z14test_sin_float
-// CHECK: call float @llvm.sin.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z14test_sin_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.sin.f32(
 float test_sin_float(float p0) { return sin(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z15test_sin_float2
-// CHECK: call <2 x float> @llvm.sin.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_sin_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sin.v2f32
 float2 test_sin_float2(float2 p0) { return sin(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z15test_sin_float3
-// CHECK: call <3 x float> @llvm.sin.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_sin_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sin.v3f32
 float3 test_sin_float3(float3 p0) { return sin(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z15test_sin_float4
-// CHECK: call <4 x float> @llvm.sin.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_sin_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sin.v4f32
 float4 test_sin_float4(float4 p0) { return sin(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/sinh.hlsl b/clang/test/CodeGenHLSL/builtins/sinh.hlsl
index 167f6f2242235..d55d60515418c 100644
--- a/clang/test/CodeGenHLSL/builtins/sinh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/sinh.hlsl
@@ -7,53 +7,53 @@
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
 // CHECK-LABEL: test_sinh_half
-// NATIVE_HALF: call half @llvm.sinh.f16
-// NO_HALF: call float @llvm.sinh.f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.sinh.f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.sinh.f32
 half test_sinh_half ( half p0 ) {
   return sinh ( p0 );
 }
 
 // CHECK-LABEL: test_sinh_half2
-// NATIVE_HALF: call <2 x half> @llvm.sinh.v2f16
-// NO_HALF: call <2 x float> @llvm.sinh.v2f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.sinh.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sinh.v2f32
 half2 test_sinh_half2 ( half2 p0 ) {
   return sinh ( p0 );
 }
 
 // CHECK-LABEL: test_sinh_half3
-// NATIVE_HALF: call <3 x half> @llvm.sinh.v3f16
-// NO_HALF: call <3 x float> @llvm.sinh.v3f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.sinh.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sinh.v3f32
 half3 test_sinh_half3 ( half3 p0 ) {
   return sinh ( p0 );
 }
 
 // CHECK-LABEL: test_sinh_half4
-// NATIVE_HALF: call <4 x half> @llvm.sinh.v4f16
-// NO_HALF: call <4 x float> @llvm.sinh.v4f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.sinh.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sinh.v4f32
 half4 test_sinh_half4 ( half4 p0 ) {
   return sinh ( p0 );
 }
 
 // CHECK-LABEL: test_sinh_float
-// CHECK: call float @llvm.sinh.f32
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.sinh.f32
 float test_sinh_float ( float p0 ) {
   return sinh ( p0 );
 }
 
 // CHECK-LABEL: test_sinh_float2
-// CHECK: call <2 x float> @llvm.sinh.v2f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sinh.v2f32
 float2 test_sinh_float2 ( float2 p0 ) {
   return sinh ( p0 );
 }
 
 // CHECK-LABEL: test_sinh_float3
-// CHECK: call <3 x float> @llvm.sinh.v3f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sinh.v3f32
 float3 test_sinh_float3 ( float3 p0 ) {
   return sinh ( p0 );
 }
 
 // CHECK-LABEL: test_sinh_float4
-// CHECK: call <4 x float> @llvm.sinh.v4f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sinh.v4f32
 float4 test_sinh_float4 ( float4 p0 ) {
   return sinh ( p0 );
 }
diff --git a/clang/test/CodeGenHLSL/builtins/sqrt.hlsl b/clang/test/CodeGenHLSL/builtins/sqrt.hlsl
index 63454cea3fe6f..94d966f0bef8a 100644
--- a/clang/test/CodeGenHLSL/builtins/sqrt.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/sqrt.hlsl
@@ -5,48 +5,48 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z14test_sqrt_half
-// NATIVE_HALF: %{{.*}} = call half @llvm.sqrt.f16(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z14test_sqrt_half
+// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn half @llvm.sqrt.f16(
 // NATIVE_HALF: ret half %{{.*}}
-// NO_HALF-LABEL: define noundef float @_Z14test_sqrt_half
-// NO_HALF: %{{.*}} = call float @llvm.sqrt.f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z14test_sqrt_half
+// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn float @llvm.sqrt.f32(
 // NO_HALF: ret float %{{.*}}
 half test_sqrt_half(half p0) { return sqrt(p0); }
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z15test_sqrt_half2
-// NATIVE_HALF: %{{.*}} = call <2 x half> @llvm.sqrt.v2f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z15test_sqrt_half2
+// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.sqrt.v2f16
 // NATIVE_HALF: ret <2 x half> %{{.*}}
-// NO_HALF-LABEL: define noundef <2 x float> @_Z15test_sqrt_half2
-// NO_HALF: %{{.*}} = call <2 x float> @llvm.sqrt.v2f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z15test_sqrt_half2
+// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sqrt.v2f32(
 // NO_HALF: ret <2 x float> %{{.*}}
 half2 test_sqrt_half2(half2 p0) { return sqrt(p0); }
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z15test_sqrt_half3
-// NATIVE_HALF: %{{.*}} = call <3 x half> @llvm.sqrt.v3f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z15test_sqrt_half3
+// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.sqrt.v3f16
 // NATIVE_HALF: ret <3 x half> %{{.*}}
-// NO_HALF-LABEL: define noundef <3 x float> @_Z15test_sqrt_half3
-// NO_HALF: %{{.*}} = call <3 x float> @llvm.sqrt.v3f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z15test_sqrt_half3
+// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sqrt.v3f32(
 // NO_HALF: ret <3 x float> %{{.*}}
 half3 test_sqrt_half3(half3 p0) { return sqrt(p0); }
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z15test_sqrt_half4
-// NATIVE_HALF: %{{.*}} = call <4 x half> @llvm.sqrt.v4f16
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z15test_sqrt_half4
+// NATIVE_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.sqrt.v4f16
 // NATIVE_HALF: ret <4 x half> %{{.*}}
-// NO_HALF-LABEL: define noundef <4 x float> @_Z15test_sqrt_half4
-// NO_HALF: %{{.*}} = call <4 x float> @llvm.sqrt.v4f32(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z15test_sqrt_half4
+// NO_HALF: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sqrt.v4f32(
 // NO_HALF: ret <4 x float> %{{.*}}
 half4 test_sqrt_half4(half4 p0) { return sqrt(p0); }
 
-// CHECK-LABEL: define noundef float @_Z15test_sqrt_float
-// CHECK: %{{.*}} = call float @llvm.sqrt.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z15test_sqrt_float
+// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn float @llvm.sqrt.f32(
 // CHECK: ret float %{{.*}}
 float test_sqrt_float(float p0) { return sqrt(p0); }
-// CHECK-LABEL: define noundef <2 x float> @_Z16test_sqrt_float2
-// CHECK: %{{.*}} = call <2 x float> @llvm.sqrt.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_sqrt_float2
+// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.sqrt.v2f32
 // CHECK: ret <2 x float> %{{.*}}
 float2 test_sqrt_float2(float2 p0) { return sqrt(p0); }
-// CHECK-LABEL: define noundef <3 x float> @_Z16test_sqrt_float3
-// CHECK: %{{.*}} = call <3 x float> @llvm.sqrt.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_sqrt_float3
+// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.sqrt.v3f32
 // CHECK: ret <3 x float> %{{.*}}
 float3 test_sqrt_float3(float3 p0) { return sqrt(p0); }
-// CHECK-LABEL: define noundef <4 x float> @_Z16test_sqrt_float4
-// CHECK: %{{.*}} = call <4 x float> @llvm.sqrt.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_sqrt_float4
+// CHECK: %{{.*}} = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.sqrt.v4f32
 // CHECK: ret <4 x float> %{{.*}}
 float4 test_sqrt_float4(float4 p0) { return sqrt(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/step.hlsl b/clang/test/CodeGenHLSL/builtins/step.hlsl
index 442f4930ca579..49d09e5c6fe6f 100644
--- a/clang/test/CodeGenHLSL/builtins/step.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/step.hlsl
@@ -2,24 +2,24 @@
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS=noundef -DTARGET=dx
+// RUN:   -DFNATTRS="noundef nofpclass(nan inf)" -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
-// RUN:   -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN:   -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTARGET=spv
 
 // NATIVE_HALF: define [[FNATTRS]] half @
-// NATIVE_HALF: call half @llvm.[[TARGET]].step.f16(half
-// NO_HALF: call float @llvm.[[TARGET]].step.f32(float
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].step.f16(half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].step.f32(float
 // NATIVE_HALF: ret half
 // NO_HALF: ret float
 half test_step_half(half p0, half p1)
@@ -27,8 +27,8 @@ half test_step_half(half p0, half p1)
     return step(p0, p1);
 }
 // NATIVE_HALF: define [[FNATTRS]] <2 x half> @
-// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].step.v2f16(<2 x half>
-// NO_HALF: call <2 x float> @llvm.[[TARGET]].step.v2f32(<2 x float>
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].step.v2f16(<2 x half>
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].step.v2f32(<2 x float>
 // NATIVE_HALF: ret <2 x half> %hlsl.step
 // NO_HALF: ret <2 x float> %hlsl.step
 half2 test_step_half2(half2 p0, half2 p1)
@@ -36,8 +36,8 @@ half2 test_step_half2(half2 p0, half2 p1)
     return step(p0, p1);
 }
 // NATIVE_HALF: define [[FNATTRS]] <3 x half> @
-// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].step.v3f16(<3 x half>
-// NO_HALF: call <3 x float> @llvm.[[TARGET]].step.v3f32(<3 x float>
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].step.v3f16(<3 x half>
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].step.v3f32(<3 x float>
 // NATIVE_HALF: ret <3 x half> %hlsl.step
 // NO_HALF: ret <3 x float> %hlsl.step
 half3 test_step_half3(half3 p0, half3 p1)
@@ -45,8 +45,8 @@ half3 test_step_half3(half3 p0, half3 p1)
     return step(p0, p1);
 }
 // NATIVE_HALF: define [[FNATTRS]] <4 x half> @
-// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].step.v4f16(<4 x half>
-// NO_HALF: call <4 x float> @llvm.[[TARGET]].step.v4f32(<4 x float>
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].step.v4f16(<4 x half>
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].step.v4f32(<4 x float>
 // NATIVE_HALF: ret <4 x half> %hlsl.step
 // NO_HALF: ret <4 x float> %hlsl.step
 half4 test_step_half4(half4 p0, half4 p1)
@@ -55,28 +55,28 @@ half4 test_step_half4(half4 p0, half4 p1)
 }
 
 // CHECK: define [[FNATTRS]] float @
-// CHECK: call float @llvm.[[TARGET]].step.f32(float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].step.f32(float
 // CHECK: ret float
 float test_step_float(float p0, float p1)
 {
     return step(p0, p1);
 }
 // CHECK: define [[FNATTRS]] <2 x float> @
-// CHECK: %hlsl.step = call <2 x float> @llvm.[[TARGET]].step.v2f32(
+// CHECK: %hlsl.step = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].step.v2f32(
 // CHECK: ret <2 x float> %hlsl.step
 float2 test_step_float2(float2 p0, float2 p1)
 {
     return step(p0, p1);
 }
 // CHECK: define [[FNATTRS]] <3 x float> @
-// CHECK: %hlsl.step = call <3 x float> @llvm.[[TARGET]].step.v3f32(
+// CHECK: %hlsl.step = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].step.v3f32(
 // CHECK: ret <3 x float> %hlsl.step
 float3 test_step_float3(float3 p0, float3 p1)
 {
     return step(p0, p1);
 }
 // CHECK: define [[FNATTRS]] <4 x float> @
-// CHECK: %hlsl.step = call <4 x float> @llvm.[[TARGET]].step.v4f32(
+// CHECK: %hlsl.step = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].step.v4f32(
 // CHECK: ret <4 x float> %hlsl.step
 float4 test_step_float4(float4 p0, float4 p1)
 {
diff --git a/clang/test/CodeGenHLSL/builtins/tan.hlsl b/clang/test/CodeGenHLSL/builtins/tan.hlsl
index aa542fac226d0..c8c948624a613 100644
--- a/clang/test/CodeGenHLSL/builtins/tan.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/tan.hlsl
@@ -7,53 +7,53 @@
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
 // CHECK-LABEL: test_tan_half
-// NATIVE_HALF: call half @llvm.tan.f16
-// NO_HALF: call float @llvm.tan.f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.tan.f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.tan.f32
 half test_tan_half ( half p0 ) {
   return tan ( p0 );
 }
 
 // CHECK-LABEL: test_tan_half2
-// NATIVE_HALF: call <2 x half> @llvm.tan.v2f16
-// NO_HALF: call <2 x float> @llvm.tan.v2f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.tan.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.tan.v2f32
 half2 test_tan_half2 ( half2 p0 ) {
   return tan ( p0 );
 }
 
 // CHECK-LABEL: test_tan_half3
-// NATIVE_HALF: call <3 x half> @llvm.tan.v3f16
-// NO_HALF: call <3 x float> @llvm.tan.v3f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.tan.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.tan.v3f32
 half3 test_tan_half3 ( half3 p0 ) {
   return tan ( p0 );
 }
 
 // CHECK-LABEL: test_tan_half4
-// NATIVE_HALF: call <4 x half> @llvm.tan.v4f16
-// NO_HALF: call <4 x float> @llvm.tan.v4f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.tan.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.tan.v4f32
 half4 test_tan_half4 ( half4 p0 ) {
   return tan ( p0 );
 }
 
 // CHECK-LABEL: test_tan_float
-// CHECK: call float @llvm.tan.f32
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.tan.f32
 float test_tan_float ( float p0 ) {
   return tan ( p0 );
 }
 
 // CHECK-LABEL: test_tan_float2
-// CHECK: call <2 x float> @llvm.tan.v2f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.tan.v2f32
 float2 test_tan_float2 ( float2 p0 ) {
   return tan ( p0 );
 }
 
 // CHECK-LABEL: test_tan_float3
-// CHECK: call <3 x float> @llvm.tan.v3f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.tan.v3f32
 float3 test_tan_float3 ( float3 p0 ) {
   return tan ( p0 );
 }
 
 // CHECK-LABEL: test_tan_float4
-// CHECK: call <4 x float> @llvm.tan.v4f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.tan.v4f32
 float4 test_tan_float4 ( float4 p0 ) {
   return tan ( p0 );
 }
diff --git a/clang/test/CodeGenHLSL/builtins/tanh.hlsl b/clang/test/CodeGenHLSL/builtins/tanh.hlsl
index 6d09c8b819f91..f947c7f53b110 100644
--- a/clang/test/CodeGenHLSL/builtins/tanh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/tanh.hlsl
@@ -7,53 +7,53 @@
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
 
 // CHECK-LABEL: test_tanh_half
-// NATIVE_HALF: call half @llvm.tanh.f16
-// NO_HALF: call float @llvm.tanh.f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.tanh.f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.tanh.f32
 half test_tanh_half ( half p0 ) {
   return tanh ( p0 );
 }
 
 // CHECK-LABEL: test_tanh_half2
-// NATIVE_HALF: call <2 x half> @llvm.tanh.v2f16
-// NO_HALF: call <2 x float> @llvm.tanh.v2f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.tanh.v2f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.tanh.v2f32
 half2 test_tanh_half2 ( half2 p0 ) {
   return tanh ( p0 );
 }
 
 // CHECK-LABEL: test_tanh_half3
-// NATIVE_HALF: call <3 x half> @llvm.tanh.v3f16
-// NO_HALF: call <3 x float> @llvm.tanh.v3f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.tanh.v3f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.tanh.v3f32
 half3 test_tanh_half3 ( half3 p0 ) {
   return tanh ( p0 );
 }
 
 // CHECK-LABEL: test_tanh_half4
-// NATIVE_HALF: call <4 x half> @llvm.tanh.v4f16
-// NO_HALF: call <4 x float> @llvm.tanh.v4f32
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.tanh.v4f16
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.tanh.v4f32
 half4 test_tanh_half4 ( half4 p0 ) {
   return tanh ( p0 );
 }
 
 // CHECK-LABEL: test_tanh_float
-// CHECK: call float @llvm.tanh.f32
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.tanh.f32
 float test_tanh_float ( float p0 ) {
   return tanh ( p0 );
 }
 
 // CHECK-LABEL: test_tanh_float2
-// CHECK: call <2 x float> @llvm.tanh.v2f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.tanh.v2f32
 float2 test_tanh_float2 ( float2 p0 ) {
   return tanh ( p0 );
 }
 
 // CHECK-LABEL: test_tanh_float3
-// CHECK: call <3 x float> @llvm.tanh.v3f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.tanh.v3f32
 float3 test_tanh_float3 ( float3 p0 ) {
   return tanh ( p0 );
 }
 
 // CHECK-LABEL: test_tanh_float4
-// CHECK: call <4 x float> @llvm.tanh.v4f32
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.tanh.v4f32
 float4 test_tanh_float4 ( float4 p0 ) {
   return tanh ( p0 );
 }
diff --git a/clang/test/CodeGenHLSL/builtins/trunc.hlsl b/clang/test/CodeGenHLSL/builtins/trunc.hlsl
index 3da12c88aa7fe..26de5bf94c3cc 100644
--- a/clang/test/CodeGenHLSL/builtins/trunc.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/trunc.hlsl
@@ -5,42 +5,42 @@
 // RUN:  -emit-llvm -disable-llvm-passes -o - | \
 // RUN:  FileCheck %s --check-prefixes=CHECK,NO_HALF
 
-// NATIVE_HALF-LABEL: define noundef half @_Z15test_trunc_half
-// NATIVE_HALF: call half @llvm.trunc.f16(
-// NO_HALF-LABEL: define noundef float @_Z15test_trunc_half
-// NO_HALF: call float @llvm.trunc.f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) half @_Z15test_trunc_half
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.trunc.f16(
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) float @_Z15test_trunc_half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.trunc.f32(
 half test_trunc_half(half p0) { return trunc(p0); }
 
-// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_trunc_half2
-// NATIVE_HALF: call <2 x half> @llvm.trunc.v2f16
-// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_trunc_half2
-// NO_HALF: call <2 x float> @llvm.trunc.v2f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <2 x half> @_Z16test_trunc_half2
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.trunc.v2f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z16test_trunc_half2
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.trunc.v2f32(
 half2 test_trunc_half2(half2 p0) { return trunc(p0); }
 
-// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_trunc_half3
-// NATIVE_HALF: call <3 x half> @llvm.trunc.v3f16
-// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_trunc_half3
-// NO_HALF: call <3 x float> @llvm.trunc.v3f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <3 x half> @_Z16test_trunc_half3
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.trunc.v3f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z16test_trunc_half3
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.trunc.v3f32(
 half3 test_trunc_half3(half3 p0) { return trunc(p0); }
 
-// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_trunc_half4
-// NATIVE_HALF: call <4 x half> @llvm.trunc.v4f16
-// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_trunc_half4
-// NO_HALF: call <4 x float> @llvm.trunc.v4f32(
+// NATIVE_HALF-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z16test_trunc_half4
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.trunc.v4f16
+// NO_HALF-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z16test_trunc_half4
+// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.trunc.v4f32(
 half4 test_trunc_half4(half4 p0) { return trunc(p0); }
 
-// CHECK-LABEL: define noundef float @_Z16test_trunc_float
-// CHECK: call float @llvm.trunc.f32(
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z16test_trunc_float
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.trunc.f32(
 float test_trunc_float(float p0) { return trunc(p0); }
 
-// CHECK-LABEL: define noundef <2 x float> @_Z17test_trunc_float2
-// CHECK: call <2 x float> @llvm.trunc.v2f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <2 x float> @_Z17test_trunc_float2
+// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.trunc.v2f32
 float2 test_trunc_float2(float2 p0) { return trunc(p0); }
 
-// CHECK-LABEL: define noundef <3 x float> @_Z17test_trunc_float3
-// CHECK: call <3 x float> @llvm.trunc.v3f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <3 x float> @_Z17test_trunc_float3
+// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.trunc.v3f32
 float3 test_trunc_float3(float3 p0) { return trunc(p0); }
 
-// CHECK-LABEL: define noundef <4 x float> @_Z17test_trunc_float4
-// CHECK: call <4 x float> @llvm.trunc.v4f32
+// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x float> @_Z17test_trunc_float4
+// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.trunc.v4f32
 float4 test_trunc_float4(float4 p0) { return trunc(p0); }
diff --git a/clang/test/CodeGenHLSL/float3.hlsl b/clang/test/CodeGenHLSL/float3.hlsl
index 767720b049152..4f03464586bf0 100644
--- a/clang/test/CodeGenHLSL/float3.hlsl
+++ b/clang/test/CodeGenHLSL/float3.hlsl
@@ -3,7 +3,7 @@
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
 // Make sure float3 is not changed into float4.
-// CHECK:<3 x float> @_Z3fooDv3_f(<3 x float> noundef %[[PARAM:[0-9a-zA-Z]+]])
+// CHECK:<3 x float> @_Z3fooDv3_f(<3 x float> noundef nofpclass(nan inf) %[[PARAM:[0-9a-zA-Z]+]])
 // CHECK:%[[A_ADDR:.+]] = alloca <3 x float>, align 16
 // CHECK-NEXT:store <3 x float> %[[PARAM]], ptr %[[A_ADDR]], align 16
 // CHECK-NEXT:%[[V:[0-9]+]] = load <3 x float>, ptr %[[A_ADDR]], align 16
diff --git a/clang/test/CodeGenHLSL/this-reference.hlsl b/clang/test/CodeGenHLSL/this-reference.hlsl
index 66b79d4250012..71a33f4a271a2 100644
--- a/clang/test/CodeGenHLSL/this-reference.hlsl
+++ b/clang/test/CodeGenHLSL/this-reference.hlsl
@@ -24,7 +24,7 @@ void main() {
   // CHECK:       %call = call noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %Vals)
   // CHECK-NEXT:  %First = getelementptr inbounds nuw %struct.Pair, ptr %Vals, i32 0, i32 0
   // CHECK-NEXT:  store i32 %call, ptr %First, align 4
-  // CHECK-NEXT:  %call1 = call noundef float @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %Vals)
+  // CHECK-NEXT:  %call1 = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) float @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %Vals)
   // CHECK-NEXT:  %Second = getelementptr inbounds nuw %struct.Pair, ptr %Vals, i32 0, i32 1
 
 // CHECK: [[Pair:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pair"

From 17912f336bd3d3db09b367c155fb44148ea4de24 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra@codasip.com>
Date: Thu, 9 Jan 2025 16:05:17 +0000
Subject: [PATCH 03/79] LAA: refactor dependence class to prep for scaled
 strides (NFC) (#122113)

Rearrange the DepDistanceAndSizeInfo struct in preparation to scale
strides. getDependenceDistanceStrideAndSize now returns the data of
CommonStride, MaxStride, and clarifies when to retry with runtime
checks, in place of (unscaled) strides.
---
 .../llvm/Analysis/LoopAccessAnalysis.h        | 22 ++++++++---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 37 +++++++++++++------
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index a35bc7402d1a8..31374a128856c 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -366,16 +366,26 @@ class MemoryDepChecker {
 
   struct DepDistanceStrideAndSizeInfo {
     const SCEV *Dist;
-    uint64_t StrideA;
-    uint64_t StrideB;
+
+    /// Strides could either be scaled (in bytes, taking the size of the
+    /// underlying type into account), or unscaled (in indexing units; unscaled
+    /// stride = scaled stride / size of underlying type). Here, strides are
+    /// unscaled.
+    uint64_t MaxStride;
+    std::optional<uint64_t> CommonStride;
+
+    bool ShouldRetryWithRuntimeCheck;
     uint64_t TypeByteSize;
     bool AIsWrite;
     bool BIsWrite;
 
-    DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA,
-                                 uint64_t StrideB, uint64_t TypeByteSize,
-                                 bool AIsWrite, bool BIsWrite)
-        : Dist(Dist), StrideA(StrideA), StrideB(StrideB),
+    DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride,
+                                 std::optional<uint64_t> CommonStride,
+                                 bool ShouldRetryWithRuntimeCheck,
+                                 uint64_t TypeByteSize, bool AIsWrite,
+                                 bool BIsWrite)
+        : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride),
+          ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck),
           TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
   };
 
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 2c75d5625cb66..38e9145826c08 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1994,8 +1994,23 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
       DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
   if (!HasSameSize)
     TypeByteSize = 0;
-  return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt),
-                                      std::abs(StrideBPtrInt), TypeByteSize,
+
+  StrideAPtrInt = std::abs(StrideAPtrInt);
+  StrideBPtrInt = std::abs(StrideBPtrInt);
+
+  uint64_t MaxStride = std::max(StrideAPtrInt, StrideBPtrInt);
+
+  std::optional<uint64_t> CommonStride;
+  if (StrideAPtrInt == StrideBPtrInt)
+    CommonStride = StrideAPtrInt;
+
+  // TODO: Historically, we don't retry with runtime checks unless the
+  // (unscaled) strides are the same. Fix this once the condition for runtime
+  // checks in isDependent is fixed.
+  bool ShouldRetryWithRuntimeCheck = CommonStride.has_value();
+
+  return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
+                                      ShouldRetryWithRuntimeCheck, TypeByteSize,
                                       AIsWrite, BIsWrite);
 }
 
@@ -2011,23 +2026,21 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   if (std::holds_alternative<Dependence::DepType>(Res))
     return std::get<Dependence::DepType>(Res);
 
-  auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
+  auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck,
+         TypeByteSize, AIsWrite, BIsWrite] =
       std::get<DepDistanceStrideAndSizeInfo>(Res);
   bool HasSameSize = TypeByteSize > 0;
 
-  std::optional<uint64_t> CommonStride =
-      StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
   if (isa<SCEVCouldNotCompute>(Dist)) {
-    // TODO: Relax requirement that there is a common stride to retry with
-    // non-constant distance dependencies.
-    FoundNonConstantDistanceDependence |= CommonStride.has_value();
+    // TODO: Relax requirement that there is a common unscaled stride to retry
+    // with non-constant distance dependencies.
+    FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
     LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
     return Dependence::Unknown;
   }
 
   ScalarEvolution &SE = *PSE.getSE();
   auto &DL = InnermostLoop->getHeader()->getDataLayout();
-  uint64_t MaxStride = std::max(StrideA, StrideB);
 
   // If the distance between the acecsses is larger than their maximum absolute
   // stride multiplied by the symbolic maximum backedge taken count (which is an
@@ -2086,7 +2099,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
         // condition to consider retrying with runtime checks. Historically, we
         // did not set it when strides were different but there is no inherent
         // reason to.
-        FoundNonConstantDistanceDependence |= CommonStride.has_value();
+        FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
         return Dependence::Unknown;
       }
       if (!HasSameSize ||
@@ -2105,7 +2118,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
   // Below we only handle strictly positive distances.
   if (MinDistance <= 0) {
-    FoundNonConstantDistanceDependence |= CommonStride.has_value();
+    FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
     return Dependence::Unknown;
   }
 
@@ -2118,7 +2131,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     // condition to consider retrying with runtime checks. Historically, we
     // did not set it when strides were different but there is no inherent
     // reason to.
-    FoundNonConstantDistanceDependence |= CommonStride.has_value();
+    FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
   }
 
   if (!HasSameSize) {

From fb03ce7aadd997486e2709051290756e09ad3d01 Mon Sep 17 00:00:00 2001
From: LoS <kaffedesk@gmail.com>
Date: Thu, 9 Jan 2025 17:22:46 +0100
Subject: [PATCH 04/79] [libc++] Fix test for vector data_const.pass.cpp
 (#122085)

The test contained some non-const accesses when we're really trying to test
that the method is const.
---
 .../sequences/vector/vector.data/data_const.pass.cpp        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp
index 885caf272afbf..0cf0b22047352 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp
@@ -39,7 +39,7 @@ TEST_CONSTEXPR_CXX20 bool tests()
         assert(is_contiguous_container_asan_correct(v));
     }
     {
-        std::vector<Nasty> v(100);
+        const std::vector<Nasty> v(100);
         assert(v.data() == std::addressof(v.front()));
         assert(is_contiguous_container_asan_correct(v));
     }
@@ -55,7 +55,7 @@ TEST_CONSTEXPR_CXX20 bool tests()
         assert(is_contiguous_container_asan_correct(v));
     }
     {
-        std::vector<Nasty, min_allocator<Nasty>> v(100);
+        const std::vector<Nasty, min_allocator<Nasty>> v(100);
         assert(v.data() == std::addressof(v.front()));
         assert(is_contiguous_container_asan_correct(v));
     }
@@ -70,7 +70,7 @@ TEST_CONSTEXPR_CXX20 bool tests()
       assert(is_contiguous_container_asan_correct(v));
     }
     {
-      std::vector<Nasty, safe_allocator<Nasty>> v(100);
+      const std::vector<Nasty, safe_allocator<Nasty>> v(100);
       assert(v.data() == std::addressof(v.front()));
       assert(is_contiguous_container_asan_correct(v));
     }

From d056c756aea3fc709cf7d6bc8acabe9a8c7218db Mon Sep 17 00:00:00 2001
From: Alexander Belyaev <32522095+pifon2a@users.noreply.github.com>
Date: Thu, 9 Jan 2025 17:31:17 +0100
Subject: [PATCH 05/79] [mlir][scf] Fix unrolling when the yielded value is
 defined above the loop. (#122177)

---
 mlir/lib/Dialect/SCF/Utils/Utils.cpp   |  2 +-
 mlir/test/Dialect/SCF/loop-unroll.mlir | 29 ++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index 6cda7100fe073..fa82bcb816a2a 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -361,7 +361,7 @@ static void generateUnrolledLoop(
 
     // Update yielded values.
     for (unsigned i = 0, e = lastYielded.size(); i < e; i++)
-      lastYielded[i] = operandMap.lookup(yieldedValues[i]);
+      lastYielded[i] = operandMap.lookupOrDefault(yieldedValues[i]);
   }
 
   // Make sure we annotate the Ops in the original body. We do this last so that
diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir
index 68a11fb6a72c6..0368505a1b70d 100644
--- a/mlir/test/Dialect/SCF/loop-unroll.mlir
+++ b/mlir/test/Dialect/SCF/loop-unroll.mlir
@@ -489,3 +489,32 @@ func.func @static_loop_unroll_with_integer_iv() -> (f32, f32) {
 //  UNROLL-BY-3-NEXT:     scf.yield %[[EADD]], %[[EMUL]] : f32, f32
 //  UNROLL-BY-3-NEXT:   }
 //  UNROLL-BY-3-NEXT:   return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32
+
+// -----
+
+// Test loop unrolling when the yielded value is defined above the loop.
+func.func @loop_unroll_static_yield_value_defined_above(%init: i32) {
+  %c42 = arith.constant 42 : i32
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %103:2 = scf.for %i = %c0 to %c4 step %c1
+      iter_args(%iter1 = %c42, %iter2 = %init) -> (i32, i32) {
+    %0 = arith.andi %iter2, %iter1 : i32
+    scf.yield %0, %init : i32, i32
+  }
+  return
+}
+// UNROLL-OUTER-BY-2-LABEL: @loop_unroll_static_yield_value_defined_above(
+// UNROLL-OUTER-BY-2-SAME:    %[[INIT:.*]]: i32) {
+// UNROLL-OUTER-BY-2-DAG:   %[[C42:.*]] = arith.constant 42 : i32
+// UNROLL-OUTER-BY-2-DAG:   %[[C0:.*]] = arith.constant 0 : index
+// UNROLL-OUTER-BY-2-DAG:   %[[C4:.*]] = arith.constant 4 : index
+// UNROLL-OUTER-BY-2-DAG:   %[[C2:.*]] = arith.constant 2 : index
+// UNROLL-OUTER-BY-2:       scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C2]]
+// UNROLL-OUTER-BY-2-SAME:      iter_args(%[[ITER1:.*]] = %[[C42]],
+// UNROLL-OUTER-BY-2-SAME:                %[[ITER2:.*]] = %[[INIT]])
+// UNROLL-OUTER-BY-2:         %[[SUM:.*]] = arith.andi %[[ITER2]], %[[ITER1]]
+// UNROLL-OUTER-BY-2:         %[[SUM1:.*]] = arith.andi %[[INIT]], %[[SUM]]
+// UNROLL-OUTER-BY-2:         scf.yield %[[SUM1]], %[[INIT]] : i32, i32
+

From 5e6c74d086848d8525b2bdd3b3e717c64b576193 Mon Sep 17 00:00:00 2001
From: Nick Sarnie <nick.sarnie@intel.com>
Date: Fri, 10 Jan 2025 01:37:57 +0900
Subject: [PATCH 06/79] [clang] Fix SPIR-V OpenMP test assembler check
 (#122310)

Testing is failing in HEAD, Integration issue with the SPIR-V OpenMP
change and the SPIR-V change adding the assembler job.

I notice there is a consistency issue in the binding name "SPIRV" vs
"SPIR-V", I will fix that in a separate commit so we unbreak build ASAP.

Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
---
 clang/test/Driver/spirv-openmp-toolchain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Driver/spirv-openmp-toolchain.c b/clang/test/Driver/spirv-openmp-toolchain.c
index 3a94d978c2d70..3d585d78e86c2 100644
--- a/clang/test/Driver/spirv-openmp-toolchain.c
+++ b/clang/test/Driver/spirv-openmp-toolchain.c
@@ -45,7 +45,7 @@
 // CHECK-BINDINGS-TEMPS: "spirv64-intel" - "clang", inputs: ["[[INPUT]]"], output: "[[DEVICE_PP:.+]]"
 // CHECK-BINDINGS-TEMPS: "spirv64-intel" - "clang", inputs: ["[[DEVICE_PP]]", "[[HOST_BC]]"], output: "[[DEVICE_TEMP_BC:.+]]"
 // CHECK-BINDINGS-TEMPS: "spirv64-intel" - "SPIR-V::Translator", inputs: ["[[DEVICE_TEMP_BC]]"], output: "[[DEVICE_ASM:.+]]"
-// CHECK-BINDINGS-TEMPS: "spirv64-intel" - "SPIR-V::Translator", inputs: ["[[DEVICE_ASM]]"], output: "[[DEVICE_SPV:.+]]"
+// CHECK-BINDINGS-TEMPS: "spirv64-intel" - "SPIRV::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "[[DEVICE_SPV:.+]]"
 // CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_SPV]]"], output: "[[DEVICE_IMAGE:.+]]"
 // CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_IMAGE]]"], output: "[[HOST_ASM:.+]]"
 // CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "clang::as", inputs: ["[[HOST_ASM]]"], output: "[[HOST_OBJ:.+]]"

From 473510abf5c6a2f9d74a0c19f0f5d8b483596e05 Mon Sep 17 00:00:00 2001
From: "Sv. Lockal" <AngryLoki@users.noreply.github.com>
Date: Fri, 10 Jan 2025 00:40:36 +0800
Subject: [PATCH 07/79] [libc++] Fix mi-mode in GDB pretty printers (#120951)

GDB/MI requires unique names for each child, otherwise fails with
"Duplicate variable object name". Also wrapped containers printers
were flattened for cleaner visualization in IDEs and CLI.

Fixes #62340
---
 .../libcxx/gdb/gdb_pretty_printer_test.py     | 56 +++++++++++++----
 .../libcxx/gdb/gdb_pretty_printer_test.sh.cpp | 63 +++++++++++++------
 libcxx/utils/gdb/libcxx/printers.py           | 41 +++++++-----
 3 files changed, 112 insertions(+), 48 deletions(-)

diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py
index 07154d001d3a1..254a61a8c633e 100644
--- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py
+++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py
@@ -15,6 +15,7 @@
 """
 
 from __future__ import print_function
+import json
 import re
 import gdb
 import sys
@@ -29,6 +30,7 @@
 # we exit.
 has_run_tests = False
 
+has_execute_mi = tuple(map(int, gdb.VERSION.split("."))) >= (14, 2)
 
 class CheckResult(gdb.Command):
     def __init__(self):
@@ -42,36 +44,43 @@ def invoke(self, arg, from_tty):
 
             # Stack frame is:
             # 0. StopForDebugger
-            # 1. ComparePrettyPrintToChars or ComparePrettyPrintToRegex
+            # 1. CompareListChildrenToChars, ComparePrettyPrintToChars or ComparePrettyPrintToRegex
             # 2. TestCase
             compare_frame = gdb.newest_frame().older()
             testcase_frame = compare_frame.older()
             test_loc = testcase_frame.find_sal()
+            test_loc_str = test_loc.symtab.filename + ":" + str(test_loc.line)
             # Use interactive commands in the correct context to get the pretty
             # printed version
 
-            value_str = self._get_value_string(compare_frame, testcase_frame)
+            frame_name = compare_frame.name()
+            if frame_name.startswith("CompareListChildren"):
+                if has_execute_mi:
+                    value = self._get_children(compare_frame)
+                else:
+                    print("SKIPPED: " + test_loc_str)
+                    return
+            else:
+                value = self._get_value(compare_frame, testcase_frame)
 
-            # Ignore the convenience variable name and newline
-            value = value_str[value_str.find("= ") + 2 : -1]
             gdb.newest_frame().select()
             expectation_val = compare_frame.read_var("expectation")
             check_literal = expectation_val.string(encoding="utf-8")
-            if "PrettyPrintToRegex" in compare_frame.name():
+            if "PrettyPrintToRegex" in frame_name:
                 test_fails = not re.search(check_literal, value)
             else:
                 test_fails = value != check_literal
 
             if test_fails:
                 global test_failures
-                print("FAIL: " + test_loc.symtab.filename + ":" + str(test_loc.line))
+                print("FAIL: " + test_loc_str)
                 print("GDB printed:")
                 print("   " + repr(value))
                 print("Value should match:")
                 print("   " + repr(check_literal))
                 test_failures += 1
             else:
-                print("PASS: " + test_loc.symtab.filename + ":" + str(test_loc.line))
+                print("PASS: " + test_loc_str)
 
         except RuntimeError as e:
             # At this point, lots of different things could be wrong, so don't try to
@@ -81,9 +90,28 @@ def invoke(self, arg, from_tty):
             print(str(e))
             test_failures += 1
 
-    def _get_value_string(self, compare_frame, testcase_frame):
+    def _get_children(self, compare_frame):
+        compare_frame.select()
+        gdb.execute_mi("-var-create", "value", "*", "value")
+        r = gdb.execute_mi("-var-list-children", "--simple-values", "value")
+        gdb.execute_mi("-var-delete", "value")
+        children = r["children"]
+        if r["displayhint"] == "map":
+            r = [
+                {
+                    "key": json.loads(children[2 * i]["value"]),
+                    "value": json.loads(children[2 * i + 1]["value"]),
+                }
+                for i in range(len(children) // 2)
+            ]
+        else:
+            r = [json.loads(el["value"]) for el in children]
+        return json.dumps(r, sort_keys=True)
+
+    def _get_value(self, compare_frame, testcase_frame):
         compare_frame.select()
-        if "ComparePrettyPrint" in compare_frame.name():
+        frame_name = compare_frame.name()
+        if frame_name.startswith("ComparePrettyPrint"):
             s = gdb.execute("p value", to_string=True)
         else:
             value_str = str(compare_frame.read_var("value"))
@@ -91,8 +119,10 @@ def _get_value_string(self, compare_frame, testcase_frame):
             testcase_frame.select()
             s = gdb.execute("p " + clean_expression_str, to_string=True)
         if sys.version_info.major == 2:
-            return s.decode("utf-8")
-        return s
+            s = s.decode("utf-8")
+
+        # Ignore the convenience variable name and newline
+        return s[s.find("= ") + 2 : -1]
 
 
 def exit_handler(event=None):
@@ -112,6 +142,10 @@ def exit_handler(event=None):
 # Disable terminal paging
 gdb.execute("set height 0")
 gdb.execute("set python print-stack full")
+
+if has_execute_mi:
+    gdb.execute_mi("-enable-pretty-printing")
+
 test_failures = 0
 CheckResult()
 test_bp = gdb.Breakpoint("StopForDebugger")
diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp
index 9d4b7039402a4..ff951d94db0a4 100644
--- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp
+++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp
@@ -129,6 +129,12 @@ void CompareExpressionPrettyPrintToRegex(
   StopForDebugger(&value, &expectation);
 }
 
+template <typename TypeToPrint>
+void CompareListChildrenToChars(TypeToPrint value, const char* expectation) {
+  MarkAsLive(value);
+  StopForDebugger(&value, &expectation);
+}
+
 namespace example {
   struct example_struct {
     int a = 0;
@@ -361,28 +367,28 @@ void multimap_test() {
 
 void queue_test() {
   std::queue<int> i_am_empty;
-  ComparePrettyPrintToChars(i_am_empty,
-      "std::queue wrapping = {std::deque is empty}");
+  ComparePrettyPrintToChars(i_am_empty, "std::queue wrapping: std::deque is empty");
 
   std::queue<int> one_two_three(std::deque<int>{1, 2, 3});
-    ComparePrettyPrintToChars(one_two_three,
-        "std::queue wrapping = {"
-        "std::deque with 3 elements = {1, 2, 3}}");
+  ComparePrettyPrintToChars(
+      one_two_three,
+      "std::queue wrapping: "
+      "std::deque with 3 elements = {1, 2, 3}");
 }
 
 void priority_queue_test() {
   std::priority_queue<int> i_am_empty;
-  ComparePrettyPrintToChars(i_am_empty,
-      "std::priority_queue wrapping = {std::vector of length 0, capacity 0}");
+  ComparePrettyPrintToChars(i_am_empty, "std::priority_queue wrapping: std::vector of length 0, capacity 0");
 
   std::priority_queue<int> one_two_three;
   one_two_three.push(11111);
   one_two_three.push(22222);
   one_two_three.push(33333);
 
-  ComparePrettyPrintToRegex(one_two_three,
-      R"(std::priority_queue wrapping = )"
-      R"({std::vector of length 3, capacity 3 = {33333)");
+  ComparePrettyPrintToRegex(
+      one_two_three,
+      R"(std::priority_queue wrapping: )"
+      R"(std::vector of length 3, capacity 3 = {33333)");
 
   ComparePrettyPrintToRegex(one_two_three, ".*11111.*");
   ComparePrettyPrintToRegex(one_two_three, ".*22222.*");
@@ -410,25 +416,22 @@ void set_test() {
 
 void stack_test() {
   std::stack<int> test0;
-  ComparePrettyPrintToChars(test0,
-                            "std::stack wrapping = {std::deque is empty}");
+  ComparePrettyPrintToChars(test0, "std::stack wrapping: std::deque is empty");
   test0.push(5);
   test0.push(6);
-  ComparePrettyPrintToChars(
-      test0, "std::stack wrapping = {std::deque with 2 elements = {5, 6}}");
+  ComparePrettyPrintToChars(test0, "std::stack wrapping: std::deque with 2 elements = {5, 6}");
   std::stack<bool> test1;
   test1.push(true);
   test1.push(false);
-  ComparePrettyPrintToChars(
-      test1,
-      "std::stack wrapping = {std::deque with 2 elements = {true, false}}");
+  ComparePrettyPrintToChars(test1, "std::stack wrapping: std::deque with 2 elements = {true, false}");
 
   std::stack<std::string> test2;
   test2.push("Hello");
   test2.push("World");
-  ComparePrettyPrintToChars(test2,
-                            "std::stack wrapping = {std::deque with 2 elements "
-                            "= {\"Hello\", \"World\"}}");
+  ComparePrettyPrintToChars(
+      test2,
+      "std::stack wrapping: std::deque with 2 elements "
+      "= {\"Hello\", \"World\"}");
 }
 
 void multiset_test() {
@@ -662,6 +665,25 @@ void streampos_test() {
   ComparePrettyPrintToRegex(test1, "^std::fpos with stream offset:5( with state: {count:0 value:0})?$");
 }
 
+void mi_mode_test() {
+  std::map<int, std::string> one_two_three_map;
+  one_two_three_map.insert({1, "one"});
+  one_two_three_map.insert({2, "two"});
+  one_two_three_map.insert({3, "three"});
+  CompareListChildrenToChars(
+      one_two_three_map, R"([{"key": 1, "value": "one"}, {"key": 2, "value": "two"}, {"key": 3, "value": "three"}])");
+
+  std::unordered_map<int, std::string> one_two_three_umap;
+  one_two_three_umap.insert({3, "three"});
+  one_two_three_umap.insert({2, "two"});
+  one_two_three_umap.insert({1, "one"});
+  CompareListChildrenToChars(
+      one_two_three_umap, R"([{"key": 3, "value": "three"}, {"key": 2, "value": "two"}, {"key": 1, "value": "one"}])");
+
+  std::deque<int> one_two_three_deque{1, 2, 3};
+  CompareListChildrenToChars(one_two_three_deque, "[1, 2, 3]");
+}
+
 int main(int, char**) {
   framework_self_test();
 
@@ -694,5 +716,6 @@ int main(int, char**) {
   unordered_set_iterator_test();
   pointer_negative_test();
   streampos_test();
+  mi_mode_test();
   return 0;
 }
diff --git a/libcxx/utils/gdb/libcxx/printers.py b/libcxx/utils/gdb/libcxx/printers.py
index 8e74b93e7b121..31c27a1959cb2 100644
--- a/libcxx/utils/gdb/libcxx/printers.py
+++ b/libcxx/utils/gdb/libcxx/printers.py
@@ -446,10 +446,13 @@ def _list_it(self):
         num_emitted = 0
         current_addr = self.start_ptr
         start_index = self.first_block_start_index
+        i = 0
         while num_emitted < self.size:
             end_index = min(start_index + self.size - num_emitted, self.block_size)
             for _, elem in self._bucket_it(current_addr, start_index, end_index):
-                yield "", elem
+                key_name = "[%d]" % i
+                i += 1
+                yield key_name, elem
             num_emitted += end_index - start_index
             current_addr = gdb.Value(addr_as_long(current_addr) + _pointer_size).cast(
                 self.node_type
@@ -494,8 +497,8 @@ def to_string(self):
 
     def _list_iter(self):
         current_node = self.first_node
-        for _ in range(self.size):
-            yield "", current_node.cast(self.nodetype).dereference()["__value_"]
+        for i in range(self.size):
+            yield "[%d]" % i, current_node.cast(self.nodetype).dereference()["__value_"]
             current_node = current_node.dereference()["__next_"]
 
     def __iter__(self):
@@ -512,15 +515,14 @@ class StdQueueOrStackPrinter(object):
     """Print a std::queue or std::stack."""
 
     def __init__(self, val):
-        self.val = val
-        self.underlying = val["c"]
+        self.typename = _remove_generics(_prettify_typename(val.type))
+        self.visualizer = gdb.default_visualizer(val["c"])
 
     def to_string(self):
-        typename = _remove_generics(_prettify_typename(self.val.type))
-        return "%s wrapping" % typename
+        return "%s wrapping: %s" % (self.typename, self.visualizer.to_string())
 
     def children(self):
-        return iter([("", self.underlying)])
+        return self.visualizer.children()
 
     def display_hint(self):
         return "array"
@@ -530,19 +532,18 @@ class StdPriorityQueuePrinter(object):
     """Print a std::priority_queue."""
 
     def __init__(self, val):
-        self.val = val
-        self.underlying = val["c"]
+        self.typename = _remove_generics(_prettify_typename(val.type))
+        self.visualizer = gdb.default_visualizer(val["c"])
 
     def to_string(self):
         # TODO(tamur): It would be nice to print the top element. The technical
         # difficulty is that, the implementation refers to the underlying
         # container, which is a generic class. libstdcxx pretty printers do not
         # print the top element.
-        typename = _remove_generics(_prettify_typename(self.val.type))
-        return "%s wrapping" % typename
+        return "%s wrapping: %s" % (self.typename, self.visualizer.to_string())
 
     def children(self):
-        return iter([("", self.underlying)])
+        return self.visualizer.children()
 
     def display_hint(self):
         return "array"
@@ -622,13 +623,16 @@ def _traverse(self):
         """Traverses the binary search tree in order."""
         current = self.util.root
         skip_left_child = False
+        i = 0
         while True:
             if not skip_left_child and self.util.left_child(current):
                 current = self.util.left_child(current)
                 continue
             skip_left_child = False
             for key_value in self._get_key_value(current):
-                yield "", key_value
+                key_name = "[%d]" % i
+                i += 1
+                yield key_name, key_value
             right_child = self.util.right_child(current)
             if right_child:
                 current = right_child
@@ -784,10 +788,13 @@ def __init__(self, val):
 
     def _list_it(self, sentinel_ptr):
         next_ptr = sentinel_ptr["__next_"]
+        i = 0
         while str(next_ptr.cast(_void_pointer_type)) != "0x0":
             next_val = next_ptr.cast(self.cast_type).dereference()
             for key_value in self._get_key_value(next_val):
-                yield "", key_value
+                key_name = "[%d]" % i
+                i += 1
+                yield key_name, key_value
             next_ptr = next_val["__next_"]
 
     def to_string(self):
@@ -851,8 +858,8 @@ def children(self):
         return self if self.addr else iter(())
 
     def __iter__(self):
-        for key_value in self._get_key_value():
-            yield "", key_value
+        for i, key_value in enumerate(self._get_key_value()):
+            yield "[%d]" % i, key_value
 
 
 class StdUnorderedSetIteratorPrinter(AbstractHashMapIteratorPrinter):

From 7724be972858477d9d4552f5fa2edb5222bff9e0 Mon Sep 17 00:00:00 2001
From: Andrea Faulds <andrea.faulds@amd.com>
Date: Thu, 9 Jan 2025 17:58:51 +0100
Subject: [PATCH 08/79] [mlir][spirv] Do SPIR-V serialization in
 -test-vulkan-runner-pipeline (#121494)

This commit is a further incremental step toward moving the whole
mlir-vulkan-runner MLIR pass pipeline into mlir-opt (see #73457). The
previous step was b225b3adf7b78387c9fcb97a3ff0e0a1e26eafe2, which moved
all device passes prior to SPIR-V serialization into a new mlir-opt test
pass, `-test-vulkan-runner-pipeline`.

This commit changes how SPIR-V serialization is accomplished for Vulkan
runner tests. Until now, this was done by the Vulkan-specific
ConvertGpuLaunchFuncToVulkanLaunchFunc pass. With this commit, this
responsibility is removed from that pass, and is instead done with the
existing generic GpuModuleToBinaryPass. In addition, the SPIR-V
serialization step is no longer done inside mlir-vulkan-runner, but
rather inside mlir-opt (in the `-test-vulkan-runner-pipeline` pass).
Both of these changes represent a greater alignment between
mlir-vulkan-runner and the other GPU integration tests. Notably, the IR
shapes produced by the mlir-opt pipelines for the Vulkan and SYCL
runners are now much more similar, with both using a gpu.binary op for
the serialized SPIR-V kernel.

In order to enable this, this commit includes these supporting changes:

- ConvertToSPIRVPass is enhanced to support producing the IR shape where
a spirv.module is nested inside a gpu.module, since this is what
GpuModuleToBinaryPass expects.
- ConvertGPULaunchFuncToVulkanLaunchFunc is changed to remove its SPIR-V
serialization functionality, and instead now extracts the SPIR-V from a
gpu.binary operation (as produced by ConvertToSPIRVPass).
- `-test-vulkan-runner-pipeline` now attaches SPIR-V target information
required by GpuModuleToBinaryPass.
- The WebGPU pass option, which had been removed from mlir-vulkan-runner
in the previous commit in this series, is restored as an option to
`-test-vulkan-runner-pipeline` instead, so that the WebGPU pass
continues being inserted into the pipeline just before SPIR-V
serialization.
---
 mlir/include/mlir/Conversion/Passes.td        |  5 +-
 .../ConvertToSPIRV/ConvertToSPIRVPass.cpp     |  5 +-
 .../Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp  |  2 +-
 ...ConvertGPULaunchFuncToVulkanLaunchFunc.cpp | 64 +++++++++++--------
 .../convert-gpu-modules-nested.mlir           | 30 +++++++++
 .../lower-gpu-launch-vulkan-launch.mlir       | 28 ++++----
 .../lib/Pass/TestVulkanRunnerPipeline.cpp     | 34 ++++++++--
 .../mlir-vulkan-runner/addui_extended.mlir    |  2 +-
 .../mlir-vulkan-runner/smul_extended.mlir     |  2 +-
 .../mlir-vulkan-runner/umul_extended.mlir     |  2 +-
 10 files changed, 121 insertions(+), 53 deletions(-)
 create mode 100644 mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules-nested.mlir

diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 58ee87cf82039..afeed370ce347 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -61,7 +61,10 @@ def ConvertToSPIRVPass : Pass<"convert-to-spirv"> {
     "Run vector unrolling to convert vector types in function bodies">,
     Option<"convertGPUModules", "convert-gpu-modules", "bool",
     /*default=*/"false",
-    "Clone and convert GPU modules">
+    "Clone and convert GPU modules">,
+    Option<"nestInGPUModule", "nest-in-gpu-module", "bool",
+    /*default=*/"false",
+    "Put converted SPIR-V module inside the gpu.module instead of alongside it.">,
   ];
 }
 
diff --git a/mlir/lib/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.cpp b/mlir/lib/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.cpp
index 4b7f7ff114dee..ab9c048f56106 100644
--- a/mlir/lib/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.cpp
@@ -108,7 +108,10 @@ struct ConvertToSPIRVPass final
     SmallVector<Operation *, 1> gpuModules;
     OpBuilder builder(context);
     op->walk([&](gpu::GPUModuleOp gpuModule) {
-      builder.setInsertionPoint(gpuModule);
+      if (nestInGPUModule)
+        builder.setInsertionPointToStart(gpuModule.getBody());
+      else
+        builder.setInsertionPoint(gpuModule);
       gpuModules.push_back(builder.clone(*gpuModule));
     });
     // Run conversion for each module independently as they can have
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
index 08b451f7d5b32..509b6343057b9 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
@@ -71,7 +71,7 @@ void GPUToSPIRVPass::runOnOperation() {
     // launch op still needs the original GPU kernel module.
     // For Vulkan Shader capabilities, we insert the newly converted SPIR-V
     // module right after the original GPU module, as that's the expectation of
-    // the in-tree Vulkan runner.
+    // the in-tree SPIR-V CPU runner (the Vulkan runner does not use this pass).
     // For OpenCL Kernel capabilities, we insert the newly converted SPIR-V
     // module inside the original GPU module, as that's the expectaion of the
     // normal GPU compilation pipeline.
diff --git a/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
index 2d2251672230b..8488fac69e8e3 100644
--- a/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
+++ b/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
@@ -7,9 +7,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements a pass to convert gpu launch function into a vulkan
-// launch function. Creates a SPIR-V binary shader from the `spirv::ModuleOp`
-// using `spirv::serialize` function, attaches binary data and entry point name
-// as an attributes to vulkan launch call op.
+// launch function. Extracts the SPIR-V from a `gpu::BinaryOp` and attaches it
+// along with the entry point name as attributes to a Vulkan launch call op.
 //
 //===----------------------------------------------------------------------===//
 
@@ -40,10 +39,9 @@ static constexpr const char *kVulkanLaunch = "vulkanLaunch";
 
 namespace {
 
-/// A pass to convert gpu launch op to vulkan launch call op, by creating a
-/// SPIR-V binary shader from `spirv::ModuleOp` using `spirv::serialize`
-/// function and attaching binary data and entry point name as an attributes to
-/// created vulkan launch call op.
+/// A pass to convert gpu launch op to vulkan launch call op, by extracting a
+/// SPIR-V binary shader from a `gpu::BinaryOp` and attaching binary data and
+/// entry point name as an attributes to created vulkan launch call op.
 class ConvertGpuLaunchFuncToVulkanLaunchFunc
     : public impl::ConvertGpuLaunchFuncToVulkanLaunchFuncBase<
           ConvertGpuLaunchFuncToVulkanLaunchFunc> {
@@ -51,10 +49,9 @@ class ConvertGpuLaunchFuncToVulkanLaunchFunc
   void runOnOperation() override;
 
 private:
-  /// Creates a SPIR-V binary shader from the given `module` using
-  /// `spirv::serialize` function.
-  LogicalResult createBinaryShader(ModuleOp module,
-                                   std::vector<char> &binaryShader);
+  /// Extracts a SPIR-V binary shader from the given `module`, if any.
+  /// Note that this also removes the binary from the IR.
+  FailureOr<StringAttr> getBinaryShader(ModuleOp module);
 
   /// Converts the given `launchOp` to vulkan launch call.
   void convertGpuLaunchFunc(gpu::LaunchFuncOp launchOp);
@@ -135,22 +132,35 @@ LogicalResult ConvertGpuLaunchFuncToVulkanLaunchFunc::declareVulkanLaunchFunc(
   return success();
 }
 
-LogicalResult ConvertGpuLaunchFuncToVulkanLaunchFunc::createBinaryShader(
-    ModuleOp module, std::vector<char> &binaryShader) {
+FailureOr<StringAttr>
+ConvertGpuLaunchFuncToVulkanLaunchFunc::getBinaryShader(ModuleOp module) {
   bool done = false;
-  SmallVector<uint32_t, 0> binary;
-  for (auto spirvModule : module.getOps<spirv::ModuleOp>()) {
+  StringAttr binaryAttr;
+  gpu::BinaryOp binaryToErase;
+  for (auto gpuBinary : module.getOps<gpu::BinaryOp>()) {
     if (done)
-      return spirvModule.emitError("should only contain one 'spirv.module' op");
+      return gpuBinary.emitError("should only contain one 'gpu.binary' op");
     done = true;
 
-    if (failed(spirv::serialize(spirvModule, binary)))
-      return failure();
+    ArrayRef<Attribute> objects = gpuBinary.getObjectsAttr().getValue();
+    if (objects.size() != 1)
+      return gpuBinary.emitError("should only contain a single object");
+
+    auto object = cast<gpu::ObjectAttr>(objects[0]);
+
+    if (!isa<spirv::TargetEnvAttr>(object.getTarget()))
+      return gpuBinary.emitError(
+          "should contain an object with a SPIR-V target environment");
+
+    binaryAttr = object.getObject();
+    binaryToErase = gpuBinary;
   }
-  binaryShader.resize(binary.size() * sizeof(uint32_t));
-  std::memcpy(binaryShader.data(), reinterpret_cast<char *>(binary.data()),
-              binaryShader.size());
-  return success();
+  if (!done)
+    return module.emitError("should contain a 'gpu.binary' op");
+
+  // Remove the binary to avoid confusing later conversion passes.
+  binaryToErase.erase();
+  return binaryAttr;
 }
 
 void ConvertGpuLaunchFuncToVulkanLaunchFunc::convertGpuLaunchFunc(
@@ -159,9 +169,9 @@ void ConvertGpuLaunchFuncToVulkanLaunchFunc::convertGpuLaunchFunc(
   OpBuilder builder(launchOp);
   Location loc = launchOp.getLoc();
 
-  // Serialize `spirv::Module` into binary form.
-  std::vector<char> binary;
-  if (failed(createBinaryShader(module, binary)))
+  FailureOr<StringAttr> binaryAttr = getBinaryShader(module);
+  // Extract SPIR-V from `gpu.binary` op.
+  if (failed(binaryAttr))
     return signalPassFailure();
 
   // Declare vulkan launch function.
@@ -182,9 +192,7 @@ void ConvertGpuLaunchFuncToVulkanLaunchFunc::convertGpuLaunchFunc(
       vulkanLaunchOperands);
 
   // Set SPIR-V binary shader data as an attribute.
-  vulkanLaunchCallOp->setAttr(
-      kSPIRVBlobAttrName,
-      builder.getStringAttr(StringRef(binary.data(), binary.size())));
+  vulkanLaunchCallOp->setAttr(kSPIRVBlobAttrName, *binaryAttr);
 
   // Set entry point name as an attribute.
   vulkanLaunchCallOp->setAttr(kSPIRVEntryPointAttrName,
diff --git a/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules-nested.mlir b/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules-nested.mlir
new file mode 100644
index 0000000000000..7562de17c606e
--- /dev/null
+++ b/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules-nested.mlir
@@ -0,0 +1,30 @@
+// RUN: mlir-opt -convert-to-spirv="convert-gpu-modules=true nest-in-gpu-module=true run-signature-conversion=false run-vector-unrolling=false" %s | FileCheck %s
+
+module attributes {
+  gpu.container_module,
+  spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Shader], []>, #spirv.resource_limits<>>
+} {
+  // CHECK-LABEL: func.func @main
+  // CHECK:       %[[C1:.*]] = arith.constant 1 : index
+  // CHECK:       gpu.launch_func  @[[$KERNELS_1:.*]]::@[[$BUILTIN_WG_ID_X:.*]] blocks in (%[[C1]], %[[C1]], %[[C1]]) threads in (%[[C1]], %[[C1]], %[[C1]])
+  func.func @main() {
+    %c1 = arith.constant 1 : index
+    gpu.launch_func @kernels_1::@builtin_workgroup_id_x
+        blocks in (%c1, %c1, %c1) threads in (%c1, %c1, %c1)
+    return
+  }
+
+  // CHECK: gpu.module @[[$KERNELS_1]]
+  // CHECK:   spirv.module @{{.*}} Logical GLSL450
+  // CHECK:   spirv.func @[[$BUILTIN_WG_ID_X]]
+  // CHECK:   spirv.mlir.addressof
+  // CHECK:   spirv.Load "Input"
+  // CHECK:   spirv.CompositeExtract
+  gpu.module @kernels_1 {
+    gpu.func @builtin_workgroup_id_x() kernel
+      attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} {
+      %0 = gpu.block_id x
+      gpu.return
+    }
+  }
+}
diff --git a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
index 665d0a33abedc..96ee1866517e6 100644
--- a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
+++ b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
@@ -1,24 +1,24 @@
-// RUN: mlir-opt %s -convert-gpu-launch-to-vulkan-launch | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Shader exts=SPV_KHR_storage_buffer_storage_class},gpu-module-to-binary,convert-gpu-launch-to-vulkan-launch)' | FileCheck %s
 
 // CHECK: %[[resource:.*]] = memref.alloc() : memref<12xf32>
 // CHECK: %[[index:.*]] = arith.constant 1 : index
 // CHECK: call @vulkanLaunch(%[[index]], %[[index]], %[[index]], %[[resource]]) {spirv_blob = "{{.*}}", spirv_element_types = [f32], spirv_entry_point = "kernel"}
 
 module attributes {gpu.container_module} {
-  spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
-    spirv.GlobalVariable @kernel_arg_0 bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>
-    spirv.func @kernel() "None" attributes {workgroup_attributions = 0 : i64} {
-      %0 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>
-      %2 = spirv.Constant 0 : i32
-      %3 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>
-      %4 = spirv.AccessChain %0[%2, %2] : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>, i32, i32 -> !spirv.ptr<f32, StorageBuffer>
-      %5 = spirv.Load "StorageBuffer" %4 : f32
-      spirv.Return
-    }
-    spirv.EntryPoint "GLCompute" @kernel
-    spirv.ExecutionMode @kernel "LocalSize", 1, 1, 1
-  }
   gpu.module @kernels {
+    spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
+      spirv.GlobalVariable @kernel_arg_0 bind(0, 0) : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>
+      spirv.func @kernel() "None" attributes {workgroup_attributions = 0 : i64} {
+        %0 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>
+        %2 = spirv.Constant 0 : i32
+        %3 = spirv.mlir.addressof @kernel_arg_0 : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>
+        %4 = spirv.AccessChain %0[%2, %2] : !spirv.ptr<!spirv.struct<(!spirv.array<12 x f32, stride=4> [0])>, StorageBuffer>, i32, i32 -> !spirv.ptr<f32, StorageBuffer>
+        %5 = spirv.Load "StorageBuffer" %4 : f32
+        spirv.Return
+      }
+      spirv.EntryPoint "GLCompute" @kernel
+      spirv.ExecutionMode @kernel "LocalSize", 1, 1, 1
+    }
     gpu.func @kernel(%arg0: memref<12xf32>) kernel {
       gpu.return
     }
diff --git a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp
index eda9aa9f9efef..789c4d76cee0d 100644
--- a/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp
+++ b/mlir/test/lib/Pass/TestVulkanRunnerPipeline.cpp
@@ -12,33 +12,57 @@
 
 #include "mlir/Conversion/ConvertToSPIRV/ConvertToSPIRVPass.h"
 #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
 #include "mlir/Dialect/SPIRV/Transforms/Passes.h"
 #include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassOptions.h"
 
 using namespace mlir;
 
 namespace {
 
-void buildTestVulkanRunnerPipeline(OpPassManager &passManager) {
+struct VulkanRunnerPipelineOptions
+    : PassPipelineOptions<VulkanRunnerPipelineOptions> {
+  Option<bool> spirvWebGPUPrepare{
+      *this, "spirv-webgpu-prepare",
+      llvm::cl::desc("Run MLIR transforms used when targetting WebGPU")};
+};
+
+void buildTestVulkanRunnerPipeline(OpPassManager &passManager,
+                                   const VulkanRunnerPipelineOptions &options) {
   passManager.addPass(createGpuKernelOutliningPass());
   passManager.addPass(memref::createFoldMemRefAliasOpsPass());
 
+  GpuSPIRVAttachTargetOptions attachTargetOptions{};
+  attachTargetOptions.spirvVersion = "v1.0";
+  attachTargetOptions.spirvCapabilities.push_back("Shader");
+  attachTargetOptions.spirvExtensions.push_back(
+      "SPV_KHR_storage_buffer_storage_class");
+  passManager.addPass(createGpuSPIRVAttachTarget(attachTargetOptions));
+
   ConvertToSPIRVPassOptions convertToSPIRVOptions{};
   convertToSPIRVOptions.convertGPUModules = true;
+  convertToSPIRVOptions.nestInGPUModule = true;
   passManager.addPass(createConvertToSPIRVPass(convertToSPIRVOptions));
-  OpPassManager &modulePM = passManager.nest<spirv::ModuleOp>();
-  modulePM.addPass(spirv::createSPIRVLowerABIAttributesPass());
-  modulePM.addPass(spirv::createSPIRVUpdateVCEPass());
+
+  OpPassManager &spirvModulePM =
+      passManager.nest<gpu::GPUModuleOp>().nest<spirv::ModuleOp>();
+  spirvModulePM.addPass(spirv::createSPIRVLowerABIAttributesPass());
+  spirvModulePM.addPass(spirv::createSPIRVUpdateVCEPass());
+  if (options.spirvWebGPUPrepare)
+    spirvModulePM.addPass(spirv::createSPIRVWebGPUPreparePass());
+
+  passManager.addPass(createGpuModuleToBinaryPass());
 }
 
 } // namespace
 
 namespace mlir::test {
 void registerTestVulkanRunnerPipeline() {
-  PassPipelineRegistration<>(
+  PassPipelineRegistration<VulkanRunnerPipelineOptions>(
       "test-vulkan-runner-pipeline",
       "Runs a series of passes for lowering GPU-dialect MLIR to "
       "SPIR-V-dialect MLIR intended for mlir-vulkan-runner.",
diff --git a/mlir/test/mlir-vulkan-runner/addui_extended.mlir b/mlir/test/mlir-vulkan-runner/addui_extended.mlir
index 158541f326be7..b8db451421459 100644
--- a/mlir/test/mlir-vulkan-runner/addui_extended.mlir
+++ b/mlir/test/mlir-vulkan-runner/addui_extended.mlir
@@ -6,7 +6,7 @@
 // RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline -spirv-webgpu-prepare \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \
 // RUN:   | mlir-vulkan-runner - \
 // RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
diff --git a/mlir/test/mlir-vulkan-runner/smul_extended.mlir b/mlir/test/mlir-vulkan-runner/smul_extended.mlir
index 2dd31d2ebb9a0..334aec843e197 100644
--- a/mlir/test/mlir-vulkan-runner/smul_extended.mlir
+++ b/mlir/test/mlir-vulkan-runner/smul_extended.mlir
@@ -6,7 +6,7 @@
 // RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline -spirv-webgpu-prepare \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \
 // RUN:   | mlir-vulkan-runner - \
 // RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
diff --git a/mlir/test/mlir-vulkan-runner/umul_extended.mlir b/mlir/test/mlir-vulkan-runner/umul_extended.mlir
index 78300d2fd81dd..803b8c3d336d3 100644
--- a/mlir/test/mlir-vulkan-runner/umul_extended.mlir
+++ b/mlir/test/mlir-vulkan-runner/umul_extended.mlir
@@ -6,7 +6,7 @@
 // RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s
 
-// RUN: mlir-opt %s -test-vulkan-runner-pipeline -spirv-webgpu-prepare \
+// RUN: mlir-opt %s -test-vulkan-runner-pipeline=spirv-webgpu-prepare \
 // RUN:   | mlir-vulkan-runner - \
 // RUN:     --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
 // RUN:     --entry-point-result=void | FileCheck %s

From 89499c666819d88c8abe85b58005fc8e5eb5f03f Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Thu, 9 Jan 2025 08:59:05 -0800
Subject: [PATCH 09/79] [BoundsChecking][test] Reorder RUN lines (#122229)

To match output order of update_test_checks.py.
---
 .../BoundsChecking/runtimes.ll                | 28 ++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll
index 7b95a4092af77..d61627287a31a 100644
--- a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll
+++ b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll
@@ -1,4 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes='bounds-checking<trap;merge>'   -S | FileCheck %s --check-prefixes=TR
+; RUN: opt < %s -passes='bounds-checking<rt;merge>'     -S | FileCheck %s --check-prefixes=RT
+
 ; RUN: opt < %s -passes=bounds-checking                 -S | FileCheck %s --check-prefixes=TR-NOMERGE
 ; RUN: opt < %s -passes='bounds-checking<trap>'         -S | FileCheck %s --check-prefixes=TR-NOMERGE
 ; RUN: opt < %s -passes='bounds-checking<rt>'           -S | FileCheck %s --check-prefixes=RT-NOMERGE
@@ -129,11 +132,28 @@ define void @f1(i64 %x) nounwind {
   ret void
 }
 
+;.
+; TR: attributes #[[ATTR0]] = { nounwind }
+; TR: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
+; TR: attributes #[[ATTR2]] = { noreturn nounwind }
+;.
+; RT: attributes #[[ATTR0]] = { nounwind }
+;.
+; TR-NOMERGE: attributes #[[ATTR0]] = { nounwind }
+; TR-NOMERGE: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind }
 ; TR-NOMERGE: attributes #[[ATTR2]] = { nomerge noreturn nounwind }
+;.
+; RT-NOMERGE: attributes #[[ATTR0]] = { nounwind }
 ; RT-NOMERGE: attributes #[[ATTR1]] = { nomerge nounwind }
+;.
+; RTABORT-NOMERGE: attributes #[[ATTR0]] = { nounwind }
+; RTABORT-NOMERGE: attributes #[[ATTR1:[0-9]+]] = { noreturn nounwind }
 ; RTABORT-NOMERGE: attributes #[[ATTR2]] = { nomerge noreturn nounwind }
+;.
+; MINRT-NOMERGE: attributes #[[ATTR0]] = { nounwind }
 ; MINRT-NOMERGE: attributes #[[ATTR1]] = { nomerge nounwind }
+;.
+; MINRTABORT-NOMERGE: attributes #[[ATTR0]] = { nounwind }
+; MINRTABORT-NOMERGE: attributes #[[ATTR1:[0-9]+]] = { noreturn nounwind }
 ; MINRTABORT-NOMERGE: attributes #[[ATTR2]] = { nomerge noreturn nounwind }
-;
-; TR: attributes #[[ATTR2]] = { noreturn nounwind }
-; RT: attributes #[[ATTR0]] = { nounwind }
+;.

From a7cbd41000f55c0475bbfb42a2bb7af418c47a35 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Thu, 9 Jan 2025 09:00:50 -0800
Subject: [PATCH 10/79] [BoundsChecking][test] Remove duplicate RUNS

Fixing failed conflict resolution in #122229.
---
 llvm/test/Instrumentation/BoundsChecking/runtimes.ll | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll
index d61627287a31a..6695e0fa549fa 100644
--- a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll
+++ b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll
@@ -9,9 +9,6 @@
 ; RUN: opt < %s -passes='bounds-checking<min-rt>'       -S | FileCheck %s --check-prefixes=MINRT-NOMERGE
 ; RUN: opt < %s -passes='bounds-checking<min-rt-abort>' -S | FileCheck %s --check-prefixes=MINRTABORT-NOMERGE
 ;
-; RUN: opt < %s -passes='bounds-checking<trap;merge>'   -S | FileCheck %s --check-prefixes=TR
-; RUN: opt < %s -passes='bounds-checking<rt;merge>'     -S | FileCheck %s --check-prefixes=RT
-
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 define void @f1(i64 %x) nounwind {

From e2449f1bceeefd4a603cae024a7a1db763df6834 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 9 Jan 2025 09:09:55 -0800
Subject: [PATCH 11/79] [SelectionDAG] Use SDNode::op_iterator instead of
 SDNodeIterator. NFC (#122147)

I think SDNodeIterator primarily exists because GraphTraits requires an
iterator that dereferences to SDNode*. op_iterator dereferences to
SDUse* which is implicitly convertible to SDValue.

This piece of code can use SDValue instead of SDNode* so we should
prefer to use the the more common op_iterator.
---
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9f57884eae04d..56194e2614af2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2985,13 +2985,11 @@ bool TargetLowering::SimplifyDemandedBits(
   if (!isTargetCanonicalConstantNode(Op) &&
       DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
     // Avoid folding to a constant if any OpaqueConstant is involved.
-    const SDNode *N = Op.getNode();
-    for (SDNode *Op :
-         llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
-      if (auto *C = dyn_cast<ConstantSDNode>(Op))
-        if (C->isOpaque())
-          return false;
-    }
+    if (llvm::any_of(Op->ops(), [](SDValue V) {
+          auto *C = dyn_cast<ConstantSDNode>(V);
+          return C && C->isOpaque();
+        }))
+      return false;
     if (VT.isInteger())
       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
     if (VT.isFloatingPoint())

From c87ef146e1ceea3ebfcd0f59266043d53affced0 Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim@gmail.com>
Date: Thu, 9 Jan 2025 12:15:20 -0500
Subject: [PATCH 12/79] [InstCombine][NFC] Precommit a test for folding a
 binary op of reductions. (#121568)

---
 .../InstCombine/fold-binop-of-reductions.ll   | 215 ++++++++++++++++++
 1 file changed, 215 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll

diff --git a/llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll b/llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll
new file mode 100644
index 0000000000000..86f17cdfb79b4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-binop-of-reductions.ll
@@ -0,0 +1,215 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i32 @add_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @add_of_reduce_add(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
+  %res = add i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @sub_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @sub_of_reduce_add(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <16 x i32> [[V0]], [[V1]]
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
+  %res = sub i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @mul_of_reduce_mul(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @mul_of_reduce_mul(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
+  %res = mul i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @and_of_reduce_and(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @and_of_reduce_and(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %v1)
+  %res = and i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @or_of_reduce_or(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @or_of_reduce_or(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = or i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v1)
+  %res = or i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @xor_of_reduce_xor(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @xor_of_reduce_xor(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = xor i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %v1)
+  %res = xor i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @reduction_does_not_match_binop(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @reduction_does_not_match_binop(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
+  %res = add i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @intrinsics_do_not_match(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @intrinsics_do_not_match(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %v1)
+  %res = add i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @element_counts_do_not_match(<16 x i32> %v0, <8 x i32> %v1) {
+; CHECK-LABEL: define i32 @element_counts_do_not_match(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v1)
+  %res = add i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @multiple_use_of_reduction_0(<16 x i32> %v0, <16 x i32> %v1, ptr %p) {
+; CHECK-LABEL: define i32 @multiple_use_of_reduction_0(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    store i32 [[V0_RED]], ptr [[P]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
+  %res = add i32 %v0_red, %v1_red
+  store i32 %v0_red, ptr %p
+  ret i32 %res
+}
+
+define i32 @multiple_use_of_reduction_1(<16 x i32> %v0, <16 x i32> %v1, ptr %p) {
+; CHECK-LABEL: define i32 @multiple_use_of_reduction_1(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    store i32 [[V1_RED]], ptr [[P]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
+  %res = add i32 %v0_red, %v1_red
+  store i32 %v1_red, ptr %p
+  ret i32 %res
+}
+
+define i32 @do_not_preserve_overflow_flags(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @do_not_preserve_overflow_flags(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
+  %res = add nsw nuw i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @preserve_disjoint_flags(<16 x i32> %v0, <16 x i32> %v1) {
+; CHECK-LABEL: define i32 @preserve_disjoint_flags(
+; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = or disjoint i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %v1)
+  %res = or disjoint i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @add_of_reduce_add_vscale(<vscale x 16 x i32> %v0, <vscale x 16 x i32> %v1) {
+; CHECK-LABEL: define i32 @add_of_reduce_add_vscale(
+; CHECK-SAME: <vscale x 16 x i32> [[V0:%.*]], <vscale x 16 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %v1)
+  %res = add i32 %v0_red, %v1_red
+  ret i32 %res
+}
+
+define i32 @element_counts_do_not_match_vscale(<vscale x 16 x i32> %v0, <vscale x 8 x i32> %v1) {
+; CHECK-LABEL: define i32 @element_counts_do_not_match_vscale(
+; CHECK-SAME: <vscale x 16 x i32> [[V0:%.*]], <vscale x 8 x i32> [[V1:%.*]]) {
+; CHECK-NEXT:    [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V0]])
+; CHECK-NEXT:    [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[V1]])
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v0_red = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %v0)
+  %v1_red = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 8 x i32> %v1)
+  %res = add i32 %v0_red, %v1_red
+  ret i32 %res
+}

From f8f8598fd886cddfd374fa43eb6d7d37d301b576 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu@sifive.com>
Date: Thu, 9 Jan 2025 09:25:51 -0800
Subject: [PATCH 13/79] [Exegesis] Add the ability to dry-run the measurement
 phase (#121991)

With the new benchmark phase, `dry-run-measurement`, llvm-exegesis can
run everything except the actual snippet execution. It is useful when we
want to test some parts of the code between the `assemble-measured-code`
and `measure` phase without actually running on native platforms.
---
 llvm/docs/CommandGuide/llvm-exegesis.rst      |  1 +
 .../llvm-exegesis/dry-run-measurement.test    | 11 +++++++
 .../tools/llvm-exegesis/lib/BenchmarkResult.h |  1 +
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     | 33 ++++++++++++++-----
 llvm/tools/llvm-exegesis/lib/Target.cpp       |  4 +--
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    |  9 +++--
 6 files changed, 46 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/dry-run-measurement.test

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index 8266d891a5e6b..d357c2ceea418 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -301,6 +301,7 @@ OPTIONS
   * ``prepare-and-assemble-snippet``: Same as ``prepare-snippet``, but also dumps an excerpt of the sequence (hex encoded).
   * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
   * ``measure``: Same as ``assemble-measured-code``, but also runs the measurement.
+  * ``dry-run-measurement``: Same as measure, but does not actually execute the snippet.
 
 .. option:: --x86-lbr-sample-period=<nBranches/sample>
 
diff --git a/llvm/test/tools/llvm-exegesis/dry-run-measurement.test b/llvm/test/tools/llvm-exegesis/dry-run-measurement.test
new file mode 100644
index 0000000000000..e4449d7df3d82
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/dry-run-measurement.test
@@ -0,0 +1,11 @@
+# RUN: llvm-exegesis --mtriple=riscv64 --mcpu=sifive-p470 --mode=latency --opcode-name=ADD --use-dummy-perf-counters --benchmark-phase=dry-run-measurement | FileCheck %s
+# REQUIRES: riscv-registered-target
+
+# This test makes sure that llvm-exegesis doesn't execute "cross-compiled" snippets in the presence of
+# --dry-run-measurement. RISC-V was chosen simply because most of the time we run tests on X86 machines.
+
+# Should not contain misleading results.
+# CHECK: measurements:    []
+
+# Should not contain error messages like "snippet crashed while running: Segmentation fault".
+# CHECK: error:           ''
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
index 3c09a8380146e..5480d85616878 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -38,6 +38,7 @@ enum class BenchmarkPhaseSelectorE {
   PrepareAndAssembleSnippet,
   AssembleMeasuredCode,
   Measure,
+  DryRunMeasure,
 };
 
 enum class BenchmarkFilter { All, RegOnly, WithMem };
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index a7771b99e97b1..cc46f7feb6cf7 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -99,7 +99,7 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
   static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
          BenchmarkRunner::ScratchSpace *Scratch,
-         std::optional<int> BenchmarkProcessCPU) {
+         std::optional<int> BenchmarkProcessCPU, bool DryRun) {
     Expected<ExecutableFunction> EF =
         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
 
@@ -107,14 +107,17 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
       return EF.takeError();
 
     return std::unique_ptr<InProcessFunctionExecutorImpl>(
-        new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
+        new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch,
+                                          DryRun));
   }
 
 private:
   InProcessFunctionExecutorImpl(const LLVMState &State,
                                 ExecutableFunction Function,
-                                BenchmarkRunner::ScratchSpace *Scratch)
-      : State(State), Function(std::move(Function)), Scratch(Scratch) {}
+                                BenchmarkRunner::ScratchSpace *Scratch,
+                                bool DryRun)
+      : State(State), Function(std::move(Function)), Scratch(Scratch),
+        DryRun(DryRun) {}
 
   static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
                                       SmallVector<int64_t, 4> *Result) {
@@ -143,9 +146,14 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
       CrashRecoveryContext CRC;
       CrashRecoveryContext::Enable();
       const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() {
-        Counter->start();
-        this->Function(ScratchPtr);
-        Counter->stop();
+        if (DryRun) {
+          Counter->start();
+          Counter->stop();
+        } else {
+          Counter->start();
+          this->Function(ScratchPtr);
+          Counter->stop();
+        }
       });
       CrashRecoveryContext::Disable();
       PS.reset();
@@ -177,6 +185,7 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
   const LLVMState &State;
   const ExecutableFunction Function;
   BenchmarkRunner::ScratchSpace *const Scratch;
+  bool DryRun = false;
 };
 
 #ifdef __linux__
@@ -664,6 +673,9 @@ Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
 BenchmarkRunner::createFunctionExecutor(
     object::OwningBinary<object::ObjectFile> ObjectFile,
     const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const {
+  bool DryRun =
+      BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::DryRunMeasure;
+
   switch (ExecutionMode) {
   case ExecutionModeE::InProcess: {
     if (BenchmarkProcessCPU.has_value())
@@ -671,7 +683,8 @@ BenchmarkRunner::createFunctionExecutor(
                                  "support benchmark core pinning.");
 
     auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
-        State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU);
+        State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU,
+        DryRun);
     if (!InProcessExecutorOrErr)
       return InProcessExecutorOrErr.takeError();
 
@@ -679,6 +692,10 @@ BenchmarkRunner::createFunctionExecutor(
   }
   case ExecutionModeE::SubProcess: {
 #ifdef __linux__
+    if (DryRun)
+      return make_error<Failure>("The subprocess execution mode cannot "
+                                 "dry-run measurement at this moment.");
+
     auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
         State, std::move(ObjectFile), Key, BenchmarkProcessCPU);
     if (!SubProcessExecutorOrErr)
diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp
index 29e58692f0e92..e2251ff978888 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Target.cpp
@@ -98,7 +98,7 @@ ExegesisTarget::createBenchmarkRunner(
     return nullptr;
   case Benchmark::Latency:
   case Benchmark::InverseThroughput:
-    if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
+    if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure &&
         !PfmCounters.CycleCounter) {
       const char *ModeName = Mode == Benchmark::Latency
                                  ? "latency"
@@ -116,7 +116,7 @@ ExegesisTarget::createBenchmarkRunner(
         State, Mode, BenchmarkPhaseSelector, ResultAggMode, ExecutionMode,
         ValidationCounters, BenchmarkRepeatCount);
   case Benchmark::Uops:
-    if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
+    if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure &&
         !PfmCounters.UopsCounter && !PfmCounters.IssueCounters)
       return make_error<Failure>(
           "can't run 'uops' mode, sched model does not define uops or issue "
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index fa37e05956be8..07bd44ee64f1f 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -132,7 +132,10 @@ static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
         clEnumValN(
             BenchmarkPhaseSelectorE::Measure, "measure",
             "Same as prepare-measured-code, but also runs the measurement "
-            "(default)")),
+            "(default)"),
+        clEnumValN(
+            BenchmarkPhaseSelectorE::DryRunMeasure, "dry-run-measurement",
+            "Same as measure, but does not actually execute the snippet")),
     cl::init(BenchmarkPhaseSelectorE::Measure));
 
 static cl::opt<bool>
@@ -476,7 +479,7 @@ static void runBenchmarkConfigurations(
 }
 
 void benchmarkMain() {
-  if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
+  if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure &&
       !UseDummyPerfCounters) {
 #ifndef HAVE_LIBPFM
     ExitWithError(
@@ -501,7 +504,7 @@ void benchmarkMain() {
 
   // Preliminary check to ensure features needed for requested
   // benchmark mode are present on target CPU and/or OS.
-  if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure)
+  if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure)
     ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
 
   if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess &&

From be32621ce8cbffe674c62e87c0c51c9fc4a21e5f Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane@nvidia.com>
Date: Thu, 9 Jan 2025 07:15:05 -0800
Subject: [PATCH 14/79] [OpenACC] Implement 'device' and 'host' clauses for
 'update'

These two clauses just take a 'var-list' and specify where the variables
should be copied from/to.  This patch implements the AST nodes for them
and ensures they properly take a var-list.
---
 clang/include/clang/AST/OpenACCClause.h       | 46 +++++++++++
 clang/include/clang/Basic/OpenACCClauses.def  |  2 +
 clang/include/clang/Sema/SemaOpenACC.h        |  6 ++
 clang/lib/AST/OpenACCClause.cpp               | 38 +++++++++-
 clang/lib/AST/StmtProfile.cpp                 |  9 +++
 clang/lib/AST/TextNodeDumper.cpp              |  2 +
 clang/lib/Parse/ParseOpenACC.cpp              |  7 +-
 clang/lib/Sema/SemaOpenACC.cpp                | 38 ++++++++++
 clang/lib/Sema/TreeTransform.h                | 24 ++++++
 clang/lib/Serialization/ASTReader.cpp         | 14 +++-
 clang/lib/Serialization/ASTWriter.cpp         | 14 +++-
 .../ast-print-openacc-update-construct.cpp    |  6 ++
 clang/test/ParserOpenACC/parse-clauses.c      | 16 ++--
 ...d-construct-auto_seq_independent-clauses.c | 24 +++---
 .../combined-construct-device_type-clause.c   |  6 +-
 .../compute-construct-device_type-clause.c    |  6 +-
 ...p-construct-auto_seq_independent-clauses.c | 24 +++---
 .../loop-construct-device_type-clause.c       |  6 +-
 .../test/SemaOpenACC/update-construct-ast.cpp | 76 +++++++++++++++++++
 clang/test/SemaOpenACC/update-construct.cpp   | 40 ++++++----
 clang/tools/libclang/CIndex.cpp               |  8 ++
 21 files changed, 345 insertions(+), 67 deletions(-)

diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index 4e4dd3447926e..f5be54bdada8b 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -965,6 +965,52 @@ class OpenACCPresentClause final
   Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc,
          ArrayRef<Expr *> VarList, SourceLocation EndLoc);
 };
+class OpenACCHostClause final
+    : public OpenACCClauseWithVarList,
+      private llvm::TrailingObjects<OpenACCHostClause, Expr *> {
+  friend TrailingObjects;
+
+  OpenACCHostClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
+                    ArrayRef<Expr *> VarList, SourceLocation EndLoc)
+      : OpenACCClauseWithVarList(OpenACCClauseKind::Host, BeginLoc, LParenLoc,
+                                 EndLoc) {
+    std::uninitialized_copy(VarList.begin(), VarList.end(),
+                            getTrailingObjects<Expr *>());
+    setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
+  }
+
+public:
+  static bool classof(const OpenACCClause *C) {
+    return C->getClauseKind() == OpenACCClauseKind::Host;
+  }
+  static OpenACCHostClause *Create(const ASTContext &C, SourceLocation BeginLoc,
+                                   SourceLocation LParenLoc,
+                                   ArrayRef<Expr *> VarList,
+                                   SourceLocation EndLoc);
+};
+
+class OpenACCDeviceClause final
+    : public OpenACCClauseWithVarList,
+      private llvm::TrailingObjects<OpenACCDeviceClause, Expr *> {
+  friend TrailingObjects;
+
+  OpenACCDeviceClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
+                      ArrayRef<Expr *> VarList, SourceLocation EndLoc)
+      : OpenACCClauseWithVarList(OpenACCClauseKind::Device, BeginLoc, LParenLoc,
+                                 EndLoc) {
+    std::uninitialized_copy(VarList.begin(), VarList.end(),
+                            getTrailingObjects<Expr *>());
+    setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
+  }
+
+public:
+  static bool classof(const OpenACCClause *C) {
+    return C->getClauseKind() == OpenACCClauseKind::Device;
+  }
+  static OpenACCDeviceClause *
+  Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc,
+         ArrayRef<Expr *> VarList, SourceLocation EndLoc);
+};
 
 class OpenACCCopyClause final
     : public OpenACCClauseWithVarList,
diff --git a/clang/include/clang/Basic/OpenACCClauses.def b/clang/include/clang/Basic/OpenACCClauses.def
index d6fb015c64913..8b15007c85557 100644
--- a/clang/include/clang/Basic/OpenACCClauses.def
+++ b/clang/include/clang/Basic/OpenACCClauses.def
@@ -41,6 +41,7 @@ VISIT_CLAUSE(Default)
 VISIT_CLAUSE(DefaultAsync)
 VISIT_CLAUSE(Delete)
 VISIT_CLAUSE(Detach)
+VISIT_CLAUSE(Device)
 VISIT_CLAUSE(DeviceNum)
 VISIT_CLAUSE(DevicePtr)
 VISIT_CLAUSE(DeviceType)
@@ -48,6 +49,7 @@ CLAUSE_ALIAS(DType, DeviceType, false)
 VISIT_CLAUSE(Finalize)
 VISIT_CLAUSE(FirstPrivate)
 VISIT_CLAUSE(Gang)
+VISIT_CLAUSE(Host)
 VISIT_CLAUSE(If)
 VISIT_CLAUSE(IfPresent)
 VISIT_CLAUSE(Independent)
diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h
index 0f86d46bc9802..2e5a0ea0aaac6 100644
--- a/clang/include/clang/Sema/SemaOpenACC.h
+++ b/clang/include/clang/Sema/SemaOpenACC.h
@@ -409,6 +409,8 @@ class SemaOpenACC : public SemaBase {
               ClauseKind == OpenACCClauseKind::Detach ||
               ClauseKind == OpenACCClauseKind::DevicePtr ||
               ClauseKind == OpenACCClauseKind::Reduction ||
+              ClauseKind == OpenACCClauseKind::Host ||
+              ClauseKind == OpenACCClauseKind::Device ||
               (ClauseKind == OpenACCClauseKind::Self &&
                DirKind == OpenACCDirectiveKind::Update) ||
               ClauseKind == OpenACCClauseKind::FirstPrivate) &&
@@ -553,6 +555,8 @@ class SemaOpenACC : public SemaBase {
               ClauseKind == OpenACCClauseKind::UseDevice ||
               ClauseKind == OpenACCClauseKind::Detach ||
               ClauseKind == OpenACCClauseKind::DevicePtr ||
+              ClauseKind == OpenACCClauseKind::Host ||
+              ClauseKind == OpenACCClauseKind::Device ||
               (ClauseKind == OpenACCClauseKind::Self &&
                DirKind == OpenACCDirectiveKind::Update) ||
               ClauseKind == OpenACCClauseKind::FirstPrivate) &&
@@ -594,6 +598,8 @@ class SemaOpenACC : public SemaBase {
               ClauseKind == OpenACCClauseKind::UseDevice ||
               ClauseKind == OpenACCClauseKind::Detach ||
               ClauseKind == OpenACCClauseKind::DevicePtr ||
+              ClauseKind == OpenACCClauseKind::Host ||
+              ClauseKind == OpenACCClauseKind::Device ||
               (ClauseKind == OpenACCClauseKind::Self &&
                DirKind == OpenACCDirectiveKind::Update) ||
               ClauseKind == OpenACCClauseKind::FirstPrivate) &&
diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp
index da63b471d9856..aa14ab902ba66 100644
--- a/clang/lib/AST/OpenACCClause.cpp
+++ b/clang/lib/AST/OpenACCClause.cpp
@@ -38,7 +38,9 @@ bool OpenACCClauseWithVarList::classof(const OpenACCClause *C) {
          OpenACCNoCreateClause::classof(C) ||
          OpenACCPresentClause::classof(C) || OpenACCCopyClause::classof(C) ||
          OpenACCCopyInClause::classof(C) || OpenACCCopyOutClause::classof(C) ||
-         OpenACCReductionClause::classof(C) || OpenACCCreateClause::classof(C);
+         OpenACCReductionClause::classof(C) ||
+         OpenACCCreateClause::classof(C) || OpenACCDeviceClause::classof(C) ||
+         OpenACCHostClause::classof(C);
 }
 bool OpenACCClauseWithCondition::classof(const OpenACCClause *C) {
   return OpenACCIfClause::classof(C);
@@ -406,6 +408,26 @@ OpenACCPresentClause *OpenACCPresentClause::Create(const ASTContext &C,
   return new (Mem) OpenACCPresentClause(BeginLoc, LParenLoc, VarList, EndLoc);
 }
 
+OpenACCHostClause *OpenACCHostClause::Create(const ASTContext &C,
+                                             SourceLocation BeginLoc,
+                                             SourceLocation LParenLoc,
+                                             ArrayRef<Expr *> VarList,
+                                             SourceLocation EndLoc) {
+  void *Mem =
+      C.Allocate(OpenACCHostClause::totalSizeToAlloc<Expr *>(VarList.size()));
+  return new (Mem) OpenACCHostClause(BeginLoc, LParenLoc, VarList, EndLoc);
+}
+
+OpenACCDeviceClause *OpenACCDeviceClause::Create(const ASTContext &C,
+                                                 SourceLocation BeginLoc,
+                                                 SourceLocation LParenLoc,
+                                                 ArrayRef<Expr *> VarList,
+                                                 SourceLocation EndLoc) {
+  void *Mem =
+      C.Allocate(OpenACCDeviceClause::totalSizeToAlloc<Expr *>(VarList.size()));
+  return new (Mem) OpenACCDeviceClause(BeginLoc, LParenLoc, VarList, EndLoc);
+}
+
 OpenACCCopyClause *
 OpenACCCopyClause::Create(const ASTContext &C, OpenACCClauseKind Spelling,
                           SourceLocation BeginLoc, SourceLocation LParenLoc,
@@ -711,6 +733,20 @@ void OpenACCClausePrinter::VisitPresentClause(const OpenACCPresentClause &C) {
   OS << ")";
 }
 
+void OpenACCClausePrinter::VisitHostClause(const OpenACCHostClause &C) {
+  OS << "host(";
+  llvm::interleaveComma(C.getVarList(), OS,
+                        [&](const Expr *E) { printExpr(E); });
+  OS << ")";
+}
+
+void OpenACCClausePrinter::VisitDeviceClause(const OpenACCDeviceClause &C) {
+  OS << "device(";
+  llvm::interleaveComma(C.getVarList(), OS,
+                        [&](const Expr *E) { printExpr(E); });
+  OS << ")";
+}
+
 void OpenACCClausePrinter::VisitCopyClause(const OpenACCCopyClause &C) {
   OS << C.getClauseKind() << '(';
   llvm::interleaveComma(C.getVarList(), OS,
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index cd91a7900538b..0f1ebc68a4f76 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2554,6 +2554,15 @@ void OpenACCClauseProfiler::VisitCreateClause(
   VisitClauseWithVarList(Clause);
 }
 
+void OpenACCClauseProfiler::VisitHostClause(const OpenACCHostClause &Clause) {
+  VisitClauseWithVarList(Clause);
+}
+
+void OpenACCClauseProfiler::VisitDeviceClause(
+    const OpenACCDeviceClause &Clause) {
+  VisitClauseWithVarList(Clause);
+}
+
 void OpenACCClauseProfiler::VisitSelfClause(const OpenACCSelfClause &Clause) {
   if (Clause.isConditionExprClause()) {
     if (Clause.hasConditionExpr())
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index eedd8faad9e85..670641242cae2 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -408,11 +408,13 @@ void TextNodeDumper::Visit(const OpenACCClause *C) {
     case OpenACCClauseKind::Copy:
     case OpenACCClauseKind::PCopy:
     case OpenACCClauseKind::PresentOrCopy:
+    case OpenACCClauseKind::Host:
     case OpenACCClauseKind::If:
     case OpenACCClauseKind::IfPresent:
     case OpenACCClauseKind::Independent:
     case OpenACCClauseKind::Detach:
     case OpenACCClauseKind::Delete:
+    case OpenACCClauseKind::Device:
     case OpenACCClauseKind::DeviceNum:
     case OpenACCClauseKind::DefaultAsync:
     case OpenACCClauseKind::DevicePtr:
diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp
index c79ba97a20077..98fd61913e5a4 100644
--- a/clang/lib/Parse/ParseOpenACC.cpp
+++ b/clang/lib/Parse/ParseOpenACC.cpp
@@ -1000,15 +1000,16 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams(
     }
     case OpenACCClauseKind::Self:
       // The 'self' clause is a var-list instead of a 'condition' in the case of
-      // the 'update' clause, so we have to handle it here.  U se an assert to
+      // the 'update' clause, so we have to handle it here.  Use an assert to
       // make sure we get the right differentiator.
       assert(DirKind == OpenACCDirectiveKind::Update);
+      [[fallthrough]];
+    case OpenACCClauseKind::Device:
+    case OpenACCClauseKind::Host:
       ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind),
                                      /*IsReadOnly=*/false, /*IsZero=*/false);
       break;
-    case OpenACCClauseKind::Device:
     case OpenACCClauseKind::DeviceResident:
-    case OpenACCClauseKind::Host:
     case OpenACCClauseKind::Link:
       ParseOpenACCVarList(ClauseKind);
       break;
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index 51a95f99f0624..a2973f1f99b2c 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -471,6 +471,22 @@ bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind,
       return false;
     }
   }
+  case OpenACCClauseKind::Device: {
+    switch (DirectiveKind) {
+    case OpenACCDirectiveKind::Update:
+      return true;
+    default:
+      return false;
+    }
+  }
+  case OpenACCClauseKind::Host: {
+    switch (DirectiveKind) {
+    case OpenACCDirectiveKind::Update:
+      return true;
+    default:
+      return false;
+    }
+  }
   }
 
   default:
@@ -1040,6 +1056,28 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitPresentClause(
                                       Clause.getVarList(), Clause.getEndLoc());
 }
 
+OpenACCClause *SemaOpenACCClauseVisitor::VisitHostClause(
+    SemaOpenACC::OpenACCParsedClause &Clause) {
+  // ActOnVar ensured that everything is a valid variable reference, so there
+  // really isn't anything to do here. GCC does some duplicate-finding, though
+  // it isn't apparent in the standard where this is justified.
+
+  return OpenACCHostClause::Create(Ctx, Clause.getBeginLoc(),
+                                   Clause.getLParenLoc(), Clause.getVarList(),
+                                   Clause.getEndLoc());
+}
+
+OpenACCClause *SemaOpenACCClauseVisitor::VisitDeviceClause(
+    SemaOpenACC::OpenACCParsedClause &Clause) {
+  // ActOnVar ensured that everything is a valid variable reference, so there
+  // really isn't anything to do here. GCC does some duplicate-finding, though
+  // it isn't apparent in the standard where this is justified.
+
+  return OpenACCDeviceClause::Create(Ctx, Clause.getBeginLoc(),
+                                     Clause.getLParenLoc(), Clause.getVarList(),
+                                     Clause.getEndLoc());
+}
+
 OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyClause(
     SemaOpenACC::OpenACCParsedClause &Clause) {
   // Restrictions only properly implemented on 'compute'/'combined'/'data'
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index d00ad5a35e823..4a3c739ecbeab 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -11730,6 +11730,30 @@ void OpenACCClauseTransform<Derived>::VisitPrivateClause(
       ParsedClause.getEndLoc());
 }
 
+template <typename Derived>
+void OpenACCClauseTransform<Derived>::VisitHostClause(
+    const OpenACCHostClause &C) {
+  ParsedClause.setVarListDetails(VisitVarList(C.getVarList()),
+                                 /*IsReadOnly=*/false, /*IsZero=*/false);
+
+  NewClause = OpenACCHostClause::Create(
+      Self.getSema().getASTContext(), ParsedClause.getBeginLoc(),
+      ParsedClause.getLParenLoc(), ParsedClause.getVarList(),
+      ParsedClause.getEndLoc());
+}
+
+template <typename Derived>
+void OpenACCClauseTransform<Derived>::VisitDeviceClause(
+    const OpenACCDeviceClause &C) {
+  ParsedClause.setVarListDetails(VisitVarList(C.getVarList()),
+                                 /*IsReadOnly=*/false, /*IsZero=*/false);
+
+  NewClause = OpenACCDeviceClause::Create(
+      Self.getSema().getASTContext(), ParsedClause.getBeginLoc(),
+      ParsedClause.getLParenLoc(), ParsedClause.getVarList(),
+      ParsedClause.getEndLoc());
+}
+
 template <typename Derived>
 void OpenACCClauseTransform<Derived>::VisitFirstPrivateClause(
     const OpenACCFirstPrivateClause &C) {
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0368990ca150d..b53f99732cacc 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -12439,6 +12439,18 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
     return OpenACCPrivateClause::Create(getContext(), BeginLoc, LParenLoc,
                                         VarList, EndLoc);
   }
+  case OpenACCClauseKind::Host: {
+    SourceLocation LParenLoc = readSourceLocation();
+    llvm::SmallVector<Expr *> VarList = readOpenACCVarList();
+    return OpenACCHostClause::Create(getContext(), BeginLoc, LParenLoc, VarList,
+                                     EndLoc);
+  }
+  case OpenACCClauseKind::Device: {
+    SourceLocation LParenLoc = readSourceLocation();
+    llvm::SmallVector<Expr *> VarList = readOpenACCVarList();
+    return OpenACCDeviceClause::Create(getContext(), BeginLoc, LParenLoc,
+                                       VarList, EndLoc);
+  }
   case OpenACCClauseKind::FirstPrivate: {
     SourceLocation LParenLoc = readSourceLocation();
     llvm::SmallVector<Expr *> VarList = readOpenACCVarList();
@@ -12611,9 +12623,7 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
   }
 
   case OpenACCClauseKind::NoHost:
-  case OpenACCClauseKind::Device:
   case OpenACCClauseKind::DeviceResident:
-  case OpenACCClauseKind::Host:
   case OpenACCClauseKind::Link:
   case OpenACCClauseKind::Bind:
   case OpenACCClauseKind::Invalid:
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 8d9396e28ed50..39004fd4d4c37 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -8371,6 +8371,18 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) {
     writeOpenACCVarList(PC);
     return;
   }
+  case OpenACCClauseKind::Host: {
+    const auto *HC = cast<OpenACCHostClause>(C);
+    writeSourceLocation(HC->getLParenLoc());
+    writeOpenACCVarList(HC);
+    return;
+  }
+  case OpenACCClauseKind::Device: {
+    const auto *DC = cast<OpenACCDeviceClause>(C);
+    writeSourceLocation(DC->getLParenLoc());
+    writeOpenACCVarList(DC);
+    return;
+  }
   case OpenACCClauseKind::FirstPrivate: {
     const auto *FPC = cast<OpenACCFirstPrivateClause>(C);
     writeSourceLocation(FPC->getLParenLoc());
@@ -8544,9 +8556,7 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) {
   }
 
   case OpenACCClauseKind::NoHost:
-  case OpenACCClauseKind::Device:
   case OpenACCClauseKind::DeviceResident:
-  case OpenACCClauseKind::Host:
   case OpenACCClauseKind::Link:
   case OpenACCClauseKind::Bind:
   case OpenACCClauseKind::Invalid:
diff --git a/clang/test/AST/ast-print-openacc-update-construct.cpp b/clang/test/AST/ast-print-openacc-update-construct.cpp
index a7f5b2a42285b..0d09c829929dc 100644
--- a/clang/test/AST/ast-print-openacc-update-construct.cpp
+++ b/clang/test/AST/ast-print-openacc-update-construct.cpp
@@ -38,4 +38,10 @@ void uses(bool cond) {
 
 // CHECK: #pragma acc update self(I, iPtr, array, array[1], array[1:2])
 #pragma acc update self(I, iPtr, array, array[1], array[1:2])
+
+// CHECK: #pragma acc update host(I, iPtr, array, array[1], array[1:2])
+#pragma acc update host (I, iPtr, array, array[1], array[1:2])
+
+// CHECK: #pragma acc update device(I, iPtr, array, array[1], array[1:2])
+#pragma acc update device(I, iPtr, array, array[1], array[1:2])
 }
diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c
index 73a09697710f9..930cc1e4c0353 100644
--- a/clang/test/ParserOpenACC/parse-clauses.c
+++ b/clang/test/ParserOpenACC/parse-clauses.c
@@ -538,22 +538,18 @@ void VarListClauses() {
 #pragma acc serial link(s.array[s.value : 5], s.value), self
   for(int i = 0; i < 5;++i) {}
 
-  // expected-error@+2{{expected ','}}
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented, clause ignored}}
-#pragma acc serial host(s.array[s.value] s.array[s.value :5] ), self
+  // expected-error@+1{{expected ','}}
+#pragma acc update host(s.array[s.value] s.array[s.value :5] )
   for(int i = 0; i < 5;++i) {}
 
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented, clause ignored}}
-#pragma acc serial host(s.array[s.value : 5], s.value), self
+#pragma acc update host(s.array[s.value : 5], s.value)
   for(int i = 0; i < 5;++i) {}
 
-  // expected-error@+2{{expected ','}}
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented, clause ignored}}
-#pragma acc serial device(s.array[s.value] s.array[s.value :5] ), self
+  // expected-error@+1{{expected ','}}
+#pragma acc update device(s.array[s.value] s.array[s.value :5] )
   for(int i = 0; i < 5;++i) {}
 
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented, clause ignored}}
-#pragma acc serial device(s.array[s.value : 5], s.value), self
+#pragma acc update device(s.array[s.value : 5], s.value)
   for(int i = 0; i < 5;++i) {}
 
   // expected-error@+1{{expected ','}}
diff --git a/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c b/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c
index 9a98f5e1659b8..9e74ce27ffbd9 100644
--- a/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c
+++ b/clang/test/SemaOpenACC/combined-construct-auto_seq_independent-clauses.c
@@ -75,7 +75,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop auto detach(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop auto device(VarPtr)
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop auto deviceptr(VarPtr)
@@ -85,7 +85,7 @@ void uses() {
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop auto firstprivate(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop auto host(Var)
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -192,7 +192,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop detach(Var) auto
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop device(VarPtr) auto
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop deviceptr(VarPtr) auto
@@ -202,7 +202,7 @@ void uses() {
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop firstprivate(Var) auto
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop host(Var) auto
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -310,7 +310,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop independent detach(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop independent device(VarPtr)
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop independent deviceptr(VarPtr)
@@ -320,7 +320,7 @@ void uses() {
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop independent firstprivate(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop independent host(Var)
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -427,7 +427,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop detach(Var) independent
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop device(VarPtr) independent
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop deviceptr(VarPtr) independent
@@ -437,7 +437,7 @@ void uses() {
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop firstprivate(Var) independent
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop host(Var) independent
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -553,7 +553,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop seq detach(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop seq device(VarPtr)
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop seq deviceptr(VarPtr)
@@ -563,7 +563,7 @@ void uses() {
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop seq firstprivate(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop seq host(Var)
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -676,7 +676,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop detach(Var) seq
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop device(VarPtr) seq
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop deviceptr(VarPtr) seq
@@ -686,7 +686,7 @@ void uses() {
   for(unsigned i = 0; i < 5; ++i);
 #pragma acc parallel loop firstprivate(Var) seq
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop host(Var) seq
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
diff --git a/clang/test/SemaOpenACC/combined-construct-device_type-clause.c b/clang/test/SemaOpenACC/combined-construct-device_type-clause.c
index 8072ae7de4ffc..c185f607548ff 100644
--- a/clang/test/SemaOpenACC/combined-construct-device_type-clause.c
+++ b/clang/test/SemaOpenACC/combined-construct-device_type-clause.c
@@ -100,8 +100,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'kernels loop' directive}}
 #pragma acc kernels loop device_type(*) detach(Var)
   for(int i = 0; i < 5; ++i);
-  // expected-error@+2{{OpenACC clause 'device' may not follow a 'device_type' clause in a 'parallel loop' construct}}
-  // expected-note@+1{{previous clause is here}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'parallel loop' directive}}
 #pragma acc parallel loop device_type(*) device(VarPtr)
   for(int i = 0; i < 5; ++i);
   // expected-error@+2{{OpenACC clause 'deviceptr' may not follow a 'device_type' clause in a 'serial loop' construct}}
@@ -116,8 +115,7 @@ void uses() {
   // expected-note@+1{{previous clause is here}}
 #pragma acc parallel loop device_type(*) firstprivate(Var)
   for(int i = 0; i < 5; ++i);
-  // expected-error@+2{{OpenACC clause 'host' may not follow a 'device_type' clause in a 'serial loop' construct}}
-  // expected-note@+1{{previous clause is here}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'serial loop' directive}}
 #pragma acc serial loop device_type(*) host(Var)
   for(int i = 0; i < 5; ++i);
   // expected-error@+2{{OpenACC clause 'link' may not follow a 'device_type' clause in a 'parallel loop' construct}}
diff --git a/clang/test/SemaOpenACC/compute-construct-device_type-clause.c b/clang/test/SemaOpenACC/compute-construct-device_type-clause.c
index 33f433ce4c519..4290fb7665685 100644
--- a/clang/test/SemaOpenACC/compute-construct-device_type-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-device_type-clause.c
@@ -106,8 +106,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'kernels' directive}}
 #pragma acc kernels device_type(*) detach(Var)
   while(1);
-  // expected-error@+2{{OpenACC clause 'device' may not follow a 'device_type' clause in a 'kernels' construct}}
-  // expected-note@+1{{previous clause is here}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'kernels' directive}}
 #pragma acc kernels device_type(*) device(VarPtr)
   while(1);
   // expected-error@+2{{OpenACC clause 'deviceptr' may not follow a 'device_type' clause in a 'kernels' construct}}
@@ -122,8 +121,7 @@ void uses() {
   // expected-note@+1{{previous clause is here}}
 #pragma acc parallel device_type(*) firstprivate(Var)
   while(1);
-  // expected-error@+2{{OpenACC clause 'host' may not follow a 'device_type' clause in a 'kernels' construct}}
-  // expected-note@+1{{previous clause is here}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'kernels' directive}}
 #pragma acc kernels device_type(*) host(Var)
   while(1);
   // expected-error@+2{{OpenACC clause 'link' may not follow a 'device_type' clause in a 'kernels' construct}}
diff --git a/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c b/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c
index d9a4c61a81c7d..f56a1267fbad1 100644
--- a/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c
+++ b/clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c
@@ -80,7 +80,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}}
 #pragma acc loop auto detach(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}}
 #pragma acc loop auto device(VarPtr)
   for(unsigned i = 0; i < 5; ++i);
   // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}}
@@ -92,7 +92,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}}
 #pragma acc loop auto firstprivate(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}}
 #pragma acc loop auto host(Var)
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -214,7 +214,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}}
 #pragma acc loop detach(Var) auto
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}}
 #pragma acc loop device(VarPtr) auto
   for(unsigned i = 0; i < 5; ++i);
   // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}}
@@ -226,7 +226,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}}
 #pragma acc loop firstprivate(Var) auto
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}}
 #pragma acc loop host(Var) auto
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -349,7 +349,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}}
 #pragma acc loop independent detach(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}}
 #pragma acc loop independent device(VarPtr)
   for(unsigned i = 0; i < 5; ++i);
   // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}}
@@ -361,7 +361,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}}
 #pragma acc loop independent firstprivate(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}}
 #pragma acc loop independent host(Var)
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -483,7 +483,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}}
 #pragma acc loop detach(Var) independent
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}}
 #pragma acc loop device(VarPtr) independent
   for(unsigned i = 0; i < 5; ++i);
   // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}}
@@ -495,7 +495,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}}
 #pragma acc loop firstprivate(Var) independent
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}}
 #pragma acc loop host(Var) independent
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -626,7 +626,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}}
 #pragma acc loop seq detach(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}}
 #pragma acc loop seq device(VarPtr)
   for(unsigned i = 0; i < 5; ++i);
   // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}}
@@ -638,7 +638,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}}
 #pragma acc loop seq firstprivate(Var)
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}}
 #pragma acc loop seq host(Var)
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
@@ -766,7 +766,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}}
 #pragma acc loop detach(Var) seq
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}}
 #pragma acc loop device(VarPtr) seq
   for(unsigned i = 0; i < 5; ++i);
   // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}}
@@ -778,7 +778,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}}
 #pragma acc loop firstprivate(Var) seq
   for(unsigned i = 0; i < 5; ++i);
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}}
 #pragma acc loop host(Var) seq
   for(unsigned i = 0; i < 5; ++i);
   // expected-warning@+1{{OpenACC clause 'link' not yet implemented}}
diff --git a/clang/test/SemaOpenACC/loop-construct-device_type-clause.c b/clang/test/SemaOpenACC/loop-construct-device_type-clause.c
index f16f17f5d6810..2c1189bc647d0 100644
--- a/clang/test/SemaOpenACC/loop-construct-device_type-clause.c
+++ b/clang/test/SemaOpenACC/loop-construct-device_type-clause.c
@@ -92,8 +92,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'detach' clause is not valid on 'loop' directive}}
 #pragma acc loop device_type(*) detach(Var)
   for(int i = 0; i < 5; ++i);
-  // expected-error@+2{{OpenACC clause 'device' may not follow a 'device_type' clause in a 'loop' construct}}
-  // expected-note@+1{{previous clause is here}}
+  // expected-error@+1{{OpenACC 'device' clause is not valid on 'loop' directive}}
 #pragma acc loop device_type(*) device(VarPtr)
   for(int i = 0; i < 5; ++i);
   // expected-error@+1{{OpenACC 'deviceptr' clause is not valid on 'loop' directive}}
@@ -106,8 +105,7 @@ void uses() {
   // expected-error@+1{{OpenACC 'firstprivate' clause is not valid on 'loop' directive}}
 #pragma acc loop device_type(*) firstprivate(Var)
   for(int i = 0; i < 5; ++i);
-  // expected-error@+2{{OpenACC clause 'host' may not follow a 'device_type' clause in a 'loop' construct}}
-  // expected-note@+1{{previous clause is here}}
+  // expected-error@+1{{OpenACC 'host' clause is not valid on 'loop' directive}}
 #pragma acc loop device_type(*) host(Var)
   for(int i = 0; i < 5; ++i);
   // expected-error@+2{{OpenACC clause 'link' may not follow a 'device_type' clause in a 'loop' construct}}
diff --git a/clang/test/SemaOpenACC/update-construct-ast.cpp b/clang/test/SemaOpenACC/update-construct-ast.cpp
index 9048e8823f5f5..5582bde7a64c2 100644
--- a/clang/test/SemaOpenACC/update-construct-ast.cpp
+++ b/clang/test/SemaOpenACC/update-construct-ast.cpp
@@ -89,6 +89,33 @@ void NormalFunc() {
   // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
   // CHECK-NEXT: IntegerLiteral{{.*}} 0
   // CHECK-NEXT: IntegerLiteral{{.*}} 1
+
+#pragma acc update host(Global, GlobalArray, GlobalArray[0], GlobalArray[0:1]) device(Global, GlobalArray, GlobalArray[0], GlobalArray[0:1])
+  // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: host clause
+  // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int'
+  // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
+  // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue
+  // CHECK-NEXT: ImplicitCastExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}} 0
+  // CHECK-NEXT: ArraySectionExpr
+  // CHECK-NEXT: ImplicitCastExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}} 0
+  // CHECK-NEXT: IntegerLiteral{{.*}} 1
+  // CHECK-NEXT: device clause
+  // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int'
+  // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
+  // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue
+  // CHECK-NEXT: ImplicitCastExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}} 0
+  // CHECK-NEXT: ArraySectionExpr
+  // CHECK-NEXT: ImplicitCastExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}} 0
+  // CHECK-NEXT: IntegerLiteral{{.*}} 1
 }
 
 template<typename T>
@@ -163,6 +190,29 @@ void TemplFunc(T t) {
   // CHECK-NEXT: IntegerLiteral{{.*}}0
   // CHECK-NEXT: IntegerLiteral{{.*}}1
 
+#pragma acc update host(Local, LocalArray, LocalArray[0], LocalArray[0:1]) device(Local, LocalArray, LocalArray[0], LocalArray[0:1])
+  // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: host clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(T::value)'
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
+  // CHECK-NEXT: ArraySubscriptExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: ArraySectionExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}1
+  // CHECK-NEXT: device clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(T::value)'
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
+  // CHECK-NEXT: ArraySubscriptExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: ArraySectionExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}1
+
   // Instantiation:
   // CHECK-NEXT: FunctionDecl{{.*}} TemplFunc 'void (SomeStruct)' implicit_instantiation
   // CHECK-NEXT: TemplateArgument type 'SomeStruct'
@@ -255,6 +305,32 @@ void TemplFunc(T t) {
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
   // CHECK-NEXT: IntegerLiteral{{.*}}0
   // CHECK-NEXT: IntegerLiteral{{.*}}1
+
+  // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: host clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(SomeStruct::value)':'const unsigned int'
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
+  // CHECK-NEXT: ArraySubscriptExpr
+  // CHECK-NEXT: ImplicitCastExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: ArraySectionExpr
+  // CHECK-NEXT: ImplicitCastExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}1
+  // CHECK-NEXT: device clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(SomeStruct::value)':'const unsigned int'
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
+  // CHECK-NEXT: ArraySubscriptExpr
+  // CHECK-NEXT: ImplicitCastExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: ArraySectionExpr
+  // CHECK-NEXT: ImplicitCastExpr
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
+  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}1
 }
 
 struct SomeStruct{
diff --git a/clang/test/SemaOpenACC/update-construct.cpp b/clang/test/SemaOpenACC/update-construct.cpp
index 2abd7a30eda88..8b65da69e42b9 100644
--- a/clang/test/SemaOpenACC/update-construct.cpp
+++ b/clang/test/SemaOpenACC/update-construct.cpp
@@ -10,9 +10,7 @@ void uses() {
 #pragma acc update if(true) self(Var)
 #pragma acc update if_present self(Var)
 #pragma acc update self(Var)
-  // expected-warning@+1{{OpenACC clause 'host' not yet implemented}}
 #pragma acc update host(Var)
-  // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
 #pragma acc update device(Var)
 
   // expected-error@+2{{OpenACC clause 'if' may not follow a 'device_type' clause in a 'update' construct}}
@@ -56,35 +54,29 @@ void uses() {
 
   // Cannot be the body of an 'if', 'while', 'do', 'switch', or
   // 'label'.
-  // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following an if statement}}
+  // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following an if statement}}
   if (true)
-    // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
 #pragma acc update device(Var)
 
-  // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following a while statement}}
+  // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following a while statement}}
   while (true)
-    // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
 #pragma acc update device(Var)
 
-  // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following a do statement}}
+  // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following a do statement}}
   do
-    // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
 #pragma acc update device(Var)
   while (true);
 
-  // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following a switch statement}}
+  // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following a switch statement}}
   switch(Var)
-    // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
 #pragma acc update device(Var)
 
-  // expected-error@+3{{OpenACC 'update' construct may not appear in place of the statement following a label statement}}
+  // expected-error@+2{{OpenACC 'update' construct may not appear in place of the statement following a label statement}}
   LABEL:
-    // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
 #pragma acc update device(Var)
 
   // For loops are OK.
   for (;;)
-    // expected-warning@+1{{OpenACC clause 'device' not yet implemented}}
 #pragma acc update device(Var)
 
   // Checking for 'async', which requires an 'int' expression.
@@ -132,11 +124,19 @@ void varlist_restrictions_templ() {
   // Members of a subarray of struct or class type may not appear, but others
   // are permitted to.
 #pragma acc update self(iArray[0:1])
+#pragma acc update host(iArray[0:1])
+#pragma acc update device(iArray[0:1])
 
 #pragma acc update self(Array[0:1])
+#pragma acc update host(Array[0:1])
+#pragma acc update device(Array[0:1])
 
   // expected-error@+1{{OpenACC sub-array is not allowed here}}
 #pragma acc update self(Array[0:1].MemberOfComp)
+  // expected-error@+1{{OpenACC sub-array is not allowed here}}
+#pragma acc update host(Array[0:1].MemberOfComp)
+  // expected-error@+1{{OpenACC sub-array is not allowed here}}
+#pragma acc update device(Array[0:1].MemberOfComp)
 }
 
 void varlist_restrictions() {
@@ -149,19 +149,33 @@ void varlist_restrictions() {
   int *LocalPtr;
 
 #pragma acc update self(LocalInt, LocalPtr, Single)
+#pragma acc update host(LocalInt, LocalPtr, Single)
+#pragma acc update device(LocalInt, LocalPtr, Single)
 
 #pragma acc update self(Single.MemberOfComp)
+#pragma acc update host(Single.MemberOfComp)
+#pragma acc update device(Single.MemberOfComp)
 
 #pragma acc update self(Single.Array[0:1])
+#pragma acc update host(Single.Array[0:1])
+#pragma acc update device(Single.Array[0:1])
 
 
   // Members of a subarray of struct or class type may not appear, but others
   // are permitted to.
 #pragma acc update self(iArray[0:1])
+#pragma acc update host(iArray[0:1])
+#pragma acc update device(iArray[0:1])
 
 #pragma acc update self(Array[0:1])
+#pragma acc update host(Array[0:1])
+#pragma acc update device(Array[0:1])
 
   // expected-error@+1{{OpenACC sub-array is not allowed here}}
 #pragma acc update self(Array[0:1].MemberOfComp)
+  // expected-error@+1{{OpenACC sub-array is not allowed here}}
+#pragma acc update host(Array[0:1].MemberOfComp)
+  // expected-error@+1{{OpenACC sub-array is not allowed here}}
+#pragma acc update device(Array[0:1].MemberOfComp)
 }
 
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 5e51fc4e2f66c..e175aab4499ff 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2877,6 +2877,14 @@ void OpenACCClauseEnqueue::VisitPrivateClause(const OpenACCPrivateClause &C) {
   VisitVarList(C);
 }
 
+void OpenACCClauseEnqueue::VisitHostClause(const OpenACCHostClause &C) {
+  VisitVarList(C);
+}
+
+void OpenACCClauseEnqueue::VisitDeviceClause(const OpenACCDeviceClause &C) {
+  VisitVarList(C);
+}
+
 void OpenACCClauseEnqueue::VisitFirstPrivateClause(
     const OpenACCFirstPrivateClause &C) {
   VisitVarList(C);

From 553fa204ed5ab4c48bc6080451df24310c00e69c Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane@nvidia.com>
Date: Thu, 9 Jan 2025 08:59:31 -0800
Subject: [PATCH 15/79] [OpenACC] Implement 'at least one of' restriction for
 'update'

This completes the implementation of 'update' by implementing its last
restriction. This restriction requires at least 1 of the 'self', 'host',
  or 'device' clauses.
---
 clang/lib/Sema/SemaOpenACC.cpp                |  13 ++-
 .../ast-print-openacc-update-construct.cpp    |  48 ++++----
 clang/test/ParserOpenACC/parse-clauses.c      |   2 +-
 clang/test/ParserOpenACC/parse-constructs.c   |   1 +
 .../test/SemaOpenACC/update-construct-ast.cpp | 104 +++++++++++-------
 clang/test/SemaOpenACC/update-construct.cpp   |  48 ++++----
 6 files changed, 129 insertions(+), 87 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index a2973f1f99b2c..9ded913638fb3 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -3732,8 +3732,17 @@ bool SemaOpenACC::ActOnStartStmtDirective(
                                 OpenACCClauseKind::DeviceType,
                                 OpenACCClauseKind::If});
 
-  // TODO: OpenACC: 'Update' construct needs to have one of 'self', 'host', or
-  // 'device'.  Implement here.
+  // OpenACC3.3 2.14.4: At least one self, host, or device clause must appear on
+  // an update directive.
+  if (K == OpenACCDirectiveKind::Update &&
+      llvm::find_if(Clauses, llvm::IsaPred<OpenACCSelfClause, OpenACCHostClause,
+                                           OpenACCDeviceClause>) ==
+          Clauses.end())
+    return Diag(StartLoc, diag::err_acc_construct_one_clause_of)
+           << K
+           << GetListOfClauses({OpenACCClauseKind::Self,
+                                OpenACCClauseKind::Host,
+                                OpenACCClauseKind::Device});
 
   return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/true);
 }
diff --git a/clang/test/AST/ast-print-openacc-update-construct.cpp b/clang/test/AST/ast-print-openacc-update-construct.cpp
index 0d09c829929dc..f999e1037e904 100644
--- a/clang/test/AST/ast-print-openacc-update-construct.cpp
+++ b/clang/test/AST/ast-print-openacc-update-construct.cpp
@@ -3,38 +3,38 @@ void uses(bool cond) {
   int I;
   int *iPtr;
   int array[5];
-  // CHECK: #pragma acc update
-#pragma acc update
+  // CHECK: #pragma acc update self(I)
+#pragma acc update self(I)
 
-// CHECK: #pragma acc update if_present
-#pragma acc update if_present
-// CHECK: #pragma acc update if(cond)
-#pragma acc update if(cond)
+// CHECK: #pragma acc update self(I) if_present
+#pragma acc update self(I) if_present
+// CHECK: #pragma acc update self(I) if(cond)
+#pragma acc update self(I) if(cond)
 
-// CHECK: #pragma acc update async
-#pragma acc update async
-// CHECK: #pragma acc update async(*iPtr)
-#pragma acc update async(*iPtr)
-// CHECK: #pragma acc update async(I)
-#pragma acc update async(I)
+// CHECK: #pragma acc update self(I) async
+#pragma acc update self(I) async
+// CHECK: #pragma acc update self(I) async(*iPtr)
+#pragma acc update self(I) async(*iPtr)
+// CHECK: #pragma acc update self(I) async(I)
+#pragma acc update self(I) async(I)
 
-// CHECK: #pragma acc update wait(*iPtr, I) async
-#pragma acc update wait(*iPtr, I) async
+// CHECK: #pragma acc update self(I) wait(*iPtr, I) async
+#pragma acc update self(I) wait(*iPtr, I) async
 
-// CHECK: #pragma acc update wait(queues: *iPtr, I) async(*iPtr)
-#pragma acc update wait(queues:*iPtr, I) async(*iPtr)
+// CHECK: #pragma acc update self(I) wait(queues: *iPtr, I) async(*iPtr)
+#pragma acc update self(I) wait(queues:*iPtr, I) async(*iPtr)
 
-// CHECK: #pragma acc update wait(devnum: I : *iPtr, I) async(I)
-#pragma acc update wait(devnum:I:*iPtr, I) async(I)
+// CHECK: #pragma acc update self(I) wait(devnum: I : *iPtr, I) async(I)
+#pragma acc update self(I) wait(devnum:I:*iPtr, I) async(I)
 
-// CHECK: #pragma acc update wait(devnum: I : queues: *iPtr, I) if(I == array[I]) async(I)
-#pragma acc update wait(devnum:I:queues:*iPtr, I) if(I == array[I]) async(I)
+// CHECK: #pragma acc update self(I) wait(devnum: I : queues: *iPtr, I) if(I == array[I]) async(I)
+#pragma acc update self(I) wait(devnum:I:queues:*iPtr, I) if(I == array[I]) async(I)
 
-// CHECK: #pragma acc update device_type(I) dtype(H)
-#pragma acc update device_type(I) dtype(H)
+// CHECK: #pragma acc update self(I) device_type(I) dtype(H)
+#pragma acc update self(I) device_type(I) dtype(H)
 
-// CHECK: #pragma acc update device_type(J) dtype(K)
-#pragma acc update device_type(J) dtype(K)
+// CHECK: #pragma acc update self(I) device_type(J) dtype(K)
+#pragma acc update self(I) device_type(J) dtype(K)
 
 // CHECK: #pragma acc update self(I, iPtr, array, array[1], array[1:2])
 #pragma acc update self(I, iPtr, array, array[1], array[1:2])
diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c
index 930cc1e4c0353..b871624b6e943 100644
--- a/clang/test/ParserOpenACC/parse-clauses.c
+++ b/clang/test/ParserOpenACC/parse-clauses.c
@@ -344,7 +344,7 @@ void SelfUpdate() {
   struct Members s;
 
   // expected-error@+1{{expected '('}}
-#pragma acc update self
+#pragma acc update host(s) self
   for(int i = 0; i < 5;++i) {}
 
   // expected-error@+3{{use of undeclared identifier 'zero'}}
diff --git a/clang/test/ParserOpenACC/parse-constructs.c b/clang/test/ParserOpenACC/parse-constructs.c
index 9948e33ac94d1..886a912713c58 100644
--- a/clang/test/ParserOpenACC/parse-constructs.c
+++ b/clang/test/ParserOpenACC/parse-constructs.c
@@ -151,6 +151,7 @@ void func() {
   // expected-error@+1{{OpenACC 'set' construct must have at least one 'default_async', 'device_num', 'device_type' or 'if' clause}}
 #pragma acc set clause list
   for(;;){}
+  // expected-error@+2{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}}
   // expected-error@+1{{invalid OpenACC clause 'clause'}}
 #pragma acc update clause list
   for(;;){}
diff --git a/clang/test/SemaOpenACC/update-construct-ast.cpp b/clang/test/SemaOpenACC/update-construct-ast.cpp
index 5582bde7a64c2..4238f93d90a34 100644
--- a/clang/test/SemaOpenACC/update-construct-ast.cpp
+++ b/clang/test/SemaOpenACC/update-construct-ast.cpp
@@ -18,8 +18,10 @@ void NormalFunc() {
   // CHECK-LABEL: NormalFunc
   // CHECK-NEXT: CompoundStmt
 
-#pragma acc update if_present if (some_int() < some_long())
+#pragma acc update self(Global) if_present if (some_int() < some_long())
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int'
   // CHECK-NEXT: if_present clause
   // CHECK-NEXT: if clause
   // CHECK-NEXT: BinaryOperator{{.*}}'bool' '<'
@@ -31,15 +33,19 @@ void NormalFunc() {
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' 'long ()'
 
-#pragma acc update wait async device_type(A) dtype(B)
+#pragma acc update self(Global) wait async device_type(A) dtype(B)
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: <<<NULL>>>
   // CHECK-NEXT: async clause
   // CHECK-NEXT: device_type(A)
   // CHECK-NEXT: dtype(B)
-#pragma acc update wait(some_int(), some_long()) async(some_int())
+#pragma acc update self(Global) wait(some_int(), some_long()) async(some_int())
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: <<<NULL>>>
   // CHECK-NEXT: CallExpr{{.*}}'int'
@@ -52,8 +58,10 @@ void NormalFunc() {
   // CHECK-NEXT: CallExpr{{.*}}'int'
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' 'int ()'
-#pragma acc update wait(queues:some_int(), some_long())
+#pragma acc update self(Global) wait(queues:some_int(), some_long())
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: <<<NULL>>>
   // CHECK-NEXT: CallExpr{{.*}}'int'
@@ -62,8 +70,10 @@ void NormalFunc() {
   // CHECK-NEXT: CallExpr{{.*}}'long'
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' 'long ()'
-#pragma acc update wait(devnum: some_int() :some_int(), some_long())
+#pragma acc update self(Global) wait(devnum: some_int() :some_int(), some_long())
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: CallExpr{{.*}}'int'
   // CHECK-NEXT: ImplicitCastExpr
@@ -83,12 +93,12 @@ void NormalFunc() {
   // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}} 0
+  // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0
   // CHECK-NEXT: ArraySectionExpr
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}} 0
-  // CHECK-NEXT: IntegerLiteral{{.*}} 1
+  // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0
+  // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 1
 
 #pragma acc update host(Global, GlobalArray, GlobalArray[0], GlobalArray[0:1]) device(Global, GlobalArray, GlobalArray[0], GlobalArray[0:1])
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
@@ -98,24 +108,24 @@ void NormalFunc() {
   // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}} 0
+  // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0
   // CHECK-NEXT: ArraySectionExpr
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}} 0
-  // CHECK-NEXT: IntegerLiteral{{.*}} 1
+  // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0
+  // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 1
   // CHECK-NEXT: device clause
   // CHECK-NEXT: DeclRefExpr{{.*}}'Global' 'int'
   // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
   // CHECK-NEXT: ArraySubscriptExpr{{.*}} 'short' lvalue
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}} 0
+  // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0
   // CHECK-NEXT: ArraySectionExpr
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}}'GlobalArray' 'short[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}} 0
-  // CHECK-NEXT: IntegerLiteral{{.*}} 1
+  // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 0
+  // CHECK-NEXT: IntegerLiteral{{.*}} 'int' 1
 }
 
 template<typename T>
@@ -126,8 +136,10 @@ void TemplFunc(T t) {
   // CHECK-NEXT: ParmVarDecl{{.*}} t 'T'
   // CHECK-NEXT: CompoundStmt
 
-#pragma acc update if_present if (T::value < t)
+#pragma acc update self(t) if_present if (T::value < t)
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T'
   // CHECK-NEXT: if_present clause
   // CHECK-NEXT: if clause
   // CHECK-NEXT: BinaryOperator{{.*}}'<dependent type>' '<'
@@ -135,15 +147,19 @@ void TemplFunc(T t) {
   // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T'
   // CHECK-NEXT: DeclRefExpr{{.*}}'t' 'T'
 
-#pragma acc update wait async device_type(T) dtype(U)
+#pragma acc update self(t) wait async device_type(T) dtype(U)
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: <<<NULL>>>
   // CHECK-NEXT: async clause
   // CHECK-NEXT: device_type(T)
   // CHECK-NEXT: dtype(U)
-#pragma acc update wait(T::value, t) async(T::value)
+#pragma acc update self(t) wait(T::value, t) async(T::value)
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: <<<NULL>>>
   // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}}'<dependent type>'
@@ -152,8 +168,10 @@ void TemplFunc(T t) {
   // CHECK-NEXT: async clause
   // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}}'<dependent type>'
   // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T'
-#pragma acc update wait(queues:T::value, t) async(t)
+#pragma acc update self(t) wait(queues:T::value, t) async(t)
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: <<<NULL>>>
   // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}}'<dependent type>'
@@ -161,8 +179,10 @@ void TemplFunc(T t) {
   // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T'
   // CHECK-NEXT: async clause
   // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T'
-#pragma acc update wait(devnum: T::value:t, T::value)
+#pragma acc update self(t) wait(devnum: T::value:t, T::value)
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'T'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}}'<dependent type>'
   // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T'
@@ -184,11 +204,11 @@ void TemplFunc(T t) {
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
   // CHECK-NEXT: ArraySubscriptExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
   // CHECK-NEXT: ArraySectionExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
-  // CHECK-NEXT: IntegerLiteral{{.*}}1
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1
 
 #pragma acc update host(Local, LocalArray, LocalArray[0], LocalArray[0:1]) device(Local, LocalArray, LocalArray[0], LocalArray[0:1])
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
@@ -197,21 +217,21 @@ void TemplFunc(T t) {
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
   // CHECK-NEXT: ArraySubscriptExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
   // CHECK-NEXT: ArraySectionExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
-  // CHECK-NEXT: IntegerLiteral{{.*}}1
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1
   // CHECK-NEXT: device clause
   // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(T::value)'
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
   // CHECK-NEXT: ArraySubscriptExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
   // CHECK-NEXT: ArraySectionExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(T::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
-  // CHECK-NEXT: IntegerLiteral{{.*}}1
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1
 
   // Instantiation:
   // CHECK-NEXT: FunctionDecl{{.*}} TemplFunc 'void (SomeStruct)' implicit_instantiation
@@ -222,6 +242,8 @@ void TemplFunc(T t) {
   // CHECK-NEXT: CompoundStmt
 
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct'
   // CHECK-NEXT: if_present clause
   // CHECK-NEXT: if clause
   // CHECK-NEXT: BinaryOperator{{.*}}'bool' '<'
@@ -234,6 +256,8 @@ void TemplFunc(T t) {
   // CHECK-NEXT: DeclRefExpr{{.*}}'t' 'SomeStruct'
 
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: <<<NULL>>>
   // CHECK-NEXT: async clause
@@ -241,6 +265,8 @@ void TemplFunc(T t) {
   // CHECK-NEXT: dtype(U)
 
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: <<<NULL>>>
   // CHECK-NEXT: ImplicitCastExpr{{.*}}'unsigned int'
@@ -256,6 +282,8 @@ void TemplFunc(T t) {
   // CHECK-NEXT: NestedNameSpecifier TypeSpec 'SomeStruct'
 
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: <<<NULL>>>
   // CHECK-NEXT: ImplicitCastExpr{{.*}}'unsigned int'
@@ -272,6 +300,8 @@ void TemplFunc(T t) {
   // CHECK-NEXT: DeclRefExpr{{.*}}'t' 'SomeStruct'
 
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
+  // CHECK-NEXT: self clause
+  // CHECK-NEXT: DeclRefExpr{{.*}} 't' 'SomeStruct'
   // CHECK-NEXT: wait clause
   // CHECK-NEXT: ImplicitCastExpr{{.*}}'unsigned int'
   // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const unsigned int'
@@ -299,12 +329,12 @@ void TemplFunc(T t) {
   // CHECK-NEXT: ArraySubscriptExpr
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
   // CHECK-NEXT: ArraySectionExpr
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
-  // CHECK-NEXT: IntegerLiteral{{.*}}1
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1
 
   // CHECK-NEXT: OpenACCUpdateConstruct{{.*}}update
   // CHECK-NEXT: host clause
@@ -313,24 +343,24 @@ void TemplFunc(T t) {
   // CHECK-NEXT: ArraySubscriptExpr
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
   // CHECK-NEXT: ArraySectionExpr
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
-  // CHECK-NEXT: IntegerLiteral{{.*}}1
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1
   // CHECK-NEXT: device clause
   // CHECK-NEXT: DeclRefExpr{{.*}} 'Local' 'decltype(SomeStruct::value)':'const unsigned int'
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
   // CHECK-NEXT: ArraySubscriptExpr
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
   // CHECK-NEXT: ArraySectionExpr
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr{{.*}} 'LocalArray' 'decltype(SomeStruct::value)[5]'
-  // CHECK-NEXT: IntegerLiteral{{.*}}0
-  // CHECK-NEXT: IntegerLiteral{{.*}}1
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 0
+  // CHECK-NEXT: IntegerLiteral{{.*}}'int' 1
 }
 
 struct SomeStruct{
diff --git a/clang/test/SemaOpenACC/update-construct.cpp b/clang/test/SemaOpenACC/update-construct.cpp
index 8b65da69e42b9..30c079c8befd4 100644
--- a/clang/test/SemaOpenACC/update-construct.cpp
+++ b/clang/test/SemaOpenACC/update-construct.cpp
@@ -21,36 +21,38 @@ void uses() {
 #pragma acc update self(Var) device_type(I) if_present
   // expected-error@+2{{OpenACC clause 'self' may not follow a 'device_type' clause in a 'update' construct}}
   // expected-note@+1{{previous clause is here}}
-#pragma acc update device_type(I) self(Var)
+#pragma acc update self(Var) device_type(I) self(Var)
   // expected-error@+2{{OpenACC clause 'host' may not follow a 'device_type' clause in a 'update' construct}}
   // expected-note@+1{{previous clause is here}}
-#pragma acc update device_type(I) host(Var)
+#pragma acc update self(Var) device_type(I) host(Var)
   // expected-error@+2{{OpenACC clause 'device' may not follow a 'device_type' clause in a 'update' construct}}
   // expected-note@+1{{previous clause is here}}
-#pragma acc update device_type(I) device(Var)
+#pragma acc update self(Var) device_type(I) device(Var)
   // These 2 are OK.
 #pragma acc update self(Var) device_type(I) async
 #pragma acc update self(Var) device_type(I) wait
   // Unless otherwise specified, we assume 'device_type' can happen after itself.
 #pragma acc update self(Var) device_type(I) device_type(I)
 
-  // TODO: OpenACC: These should diagnose because there isn't at least 1 of
-  // 'self', 'host', or 'device'.
+  // These diagnose because there isn't at least 1 of 'self', 'host', or
+  // 'device'.
+  // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}}
 #pragma acc update async
+  // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}}
 #pragma acc update wait
+  // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}}
 #pragma acc update device_type(I)
+  // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}}
 #pragma acc update if(true)
+  // expected-error@+1{{OpenACC 'update' construct must have at least one 'self', 'host' or 'device' clause}}
 #pragma acc update if_present
 
   // expected-error@+1{{value of type 'struct NotConvertible' is not contextually convertible to 'bool'}}
-#pragma acc update if (NC) device_type(I)
+#pragma acc update self(Var) if (NC) device_type(I)
 
   // expected-error@+2{{OpenACC 'if' clause cannot appear more than once on a 'update' directive}}
   // expected-note@+1{{previous clause is here}}
-#pragma acc update if(true) if (false)
-
-  // TODO: OpenACC: There is restrictions on the contents of a 'varlist', so
-  // those should be checked here too.
+#pragma acc update self(Var) if(true) if (false)
 
   // Cannot be the body of an 'if', 'while', 'do', 'switch', or
   // 'label'.
@@ -80,34 +82,34 @@ void uses() {
 #pragma acc update device(Var)
 
   // Checking for 'async', which requires an 'int' expression.
-#pragma acc update async
+#pragma acc update self(Var) async
 
-#pragma acc update async(getI())
+#pragma acc update self(Var) async(getI())
   // expected-error@+2{{expected ')'}}
   // expected-note@+1{{to match this '('}}
-#pragma acc update async(getI(), getI())
+#pragma acc update self(Var) async(getI(), getI())
   // expected-error@+2{{OpenACC 'async' clause cannot appear more than once on a 'update' directive}}
   // expected-note@+1{{previous clause is here}}
-#pragma acc update async(getI()) async(getI())
+#pragma acc update self(Var) async(getI()) async(getI())
   // expected-error@+1{{OpenACC clause 'async' requires expression of integer type ('struct NotConvertible' invalid)}}
-#pragma acc update async(NC)
+#pragma acc update self(Var) async(NC)
 
   // Checking for 'wait', which has a complicated set arguments.
-#pragma acc update wait
-#pragma acc update wait()
-#pragma acc update wait(getI(), getI())
-#pragma acc update wait(devnum: getI():  getI())
-#pragma acc update wait(devnum: getI(): queues: getI(), getI())
+#pragma acc update self(Var) wait
+#pragma acc update self(Var) wait()
+#pragma acc update self(Var) wait(getI(), getI())
+#pragma acc update self(Var) wait(devnum: getI():  getI())
+#pragma acc update self(Var) wait(devnum: getI(): queues: getI(), getI())
   // expected-error@+1{{OpenACC clause 'wait' requires expression of integer type ('struct NotConvertible' invalid)}}
-#pragma acc update wait(devnum:NC : 5)
+#pragma acc update self(Var) wait(devnum:NC : 5)
   // expected-error@+1{{OpenACC clause 'wait' requires expression of integer type ('struct NotConvertible' invalid)}}
-#pragma acc update wait(devnum:5 : NC)
+#pragma acc update self(Var) wait(devnum:5 : NC)
 
     int arr[5];
   // expected-error@+3{{OpenACC clause 'wait' requires expression of integer type ('int[5]' invalid)}}
   // expected-error@+2{{OpenACC clause 'wait' requires expression of integer type ('int[5]' invalid)}}
   // expected-error@+1{{OpenACC clause 'wait' requires expression of integer type ('struct NotConvertible' invalid)}}
-#pragma acc update wait(devnum:arr : queues: arr, NC, 5)
+#pragma acc update self(Var) wait(devnum:arr : queues: arr, NC, 5)
 }
 
 struct SomeS {

From 8ac6a6b81629840d2456e32ec4651440523718dc Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 9 Jan 2025 18:33:42 +0100
Subject: [PATCH 16/79] Reorder fields so InitDeferredFlag is destroyed last

TimerGroup's dtor accesses this field.

once_flag has a trivial destructor so it doesn't really matter, but msan
checks that it's not accessed after destruction.
---
 llvm/lib/Support/Timer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp
index ccd78ad86e91c..3f0926ae0f3cb 100644
--- a/llvm/lib/Support/Timer.cpp
+++ b/llvm/lib/Support/Timer.cpp
@@ -507,11 +507,11 @@ class llvm::TimerGlobals {
   // Order of these members and initialization below is important. For example
   // the DefaultTimerGroup uses the TimerLock. Most of these also depend on the
   // options above.
+  std::once_flag InitDeferredFlag;
   std::unique_ptr<SignpostEmitter> SignpostsPtr;
   std::unique_ptr<sys::SmartMutex<true>> TimerLockPtr;
   std::unique_ptr<TimerGroup> DefaultTimerGroupPtr;
   std::unique_ptr<Name2PairMap> NamedGroupedTimersPtr;
-  std::once_flag InitDeferredFlag;
   TimerGlobals &initDeferred() {
     std::call_once(InitDeferredFlag, [this]() {
       SignpostsPtr = std::make_unique<SignpostEmitter>();

From 43e663d0fd82e226203b051d31fd3ce5388ca984 Mon Sep 17 00:00:00 2001
From: fahadnayyar <30953967+fahadnayyar@users.noreply.github.com>
Date: Thu, 9 Jan 2025 09:42:24 -0800
Subject: [PATCH 17/79] [APINotes] [NFC] Add tests for
 SWIFT_RETURNS_(UN)RETAINED for ObjC APIs (#122167)

Adding test case to verify that SwiftReturnOwnership works correctly for
ObjC functions and methods as well.

rdar://142504115
---
 .../SwiftReturnOwnershipForObjC.apinotes        | 17 +++++++++++++++++
 .../Headers/SwiftReturnOwnershipForObjC.h       |  9 +++++++++
 .../APINotes/Inputs/Headers/module.modulemap    |  4 ++++
 clang/test/APINotes/swift-return-ownership.m    | 11 +++++++++++
 4 files changed, 41 insertions(+)
 create mode 100644 clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.apinotes
 create mode 100644 clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.h
 create mode 100644 clang/test/APINotes/swift-return-ownership.m

diff --git a/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.apinotes b/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.apinotes
new file mode 100644
index 0000000000000..4b749b40e026f
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.apinotes
@@ -0,0 +1,17 @@
+---
+Name: SwiftImportAsForObjC
+Classes:
+  - Name:            MethodTest
+    Methods:
+      - Selector:        getUnowned
+        MethodKind:      Instance
+        SwiftReturnOwnership: unretained
+      - Selector:        getOwned
+        MethodKind:      Instance
+        SwiftReturnOwnership: retained
+
+Functions:
+  - Name:            getObjCUnowned
+    SwiftReturnOwnership: unretained
+  - Name:            getObjCOwned
+    SwiftReturnOwnership: retained
diff --git a/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.h b/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.h
new file mode 100644
index 0000000000000..83e2535387caa
--- /dev/null
+++ b/clang/test/APINotes/Inputs/Headers/SwiftReturnOwnershipForObjC.h
@@ -0,0 +1,9 @@
+struct RefCountedType { int value; };
+
+@interface MethodTest
+- (struct RefCountedType *)getUnowned;
+- (struct RefCountedType *)getOwned;
+@end
+
+struct RefCountedType * getObjCUnowned(void);
+struct RefCountedType * getObjCOwned(void);
diff --git a/clang/test/APINotes/Inputs/Headers/module.modulemap b/clang/test/APINotes/Inputs/Headers/module.modulemap
index 31f7d36356d83..bedb7d505f794 100644
--- a/clang/test/APINotes/Inputs/Headers/module.modulemap
+++ b/clang/test/APINotes/Inputs/Headers/module.modulemap
@@ -57,3 +57,7 @@ module Templates {
 module SwiftImportAs {
   header "SwiftImportAs.h"
 }
+
+module SwiftReturnOwnershipForObjC {
+  header "SwiftReturnOwnershipForObjC.h"
+}
diff --git a/clang/test/APINotes/swift-return-ownership.m b/clang/test/APINotes/swift-return-ownership.m
new file mode 100644
index 0000000000000..b1221d159e610
--- /dev/null
+++ b/clang/test/APINotes/swift-return-ownership.m
@@ -0,0 +1,11 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s
+// RUN: %clang_cc1 -ast-print %t/ModulesCache/SwiftReturnOwnershipForObjC.pcm | FileCheck %s
+#import <SwiftReturnOwnershipForObjC.h>
+
+// CHECK: @interface MethodTest
+// CHECK: - (struct RefCountedType *)getUnowned __attribute__((swift_attr("returns_unretained")));
+// CHECK: - (struct RefCountedType *)getOwned __attribute__((swift_attr("returns_retained")));
+// CHECK: @end
+// CHECK: __attribute__((swift_attr("returns_unretained"))) struct RefCountedType *getObjCUnowned(void);
+// CHECK: __attribute__((swift_attr("returns_retained"))) struct RefCountedType *getObjCOwned(void);

From 50ca2eff5fb2c239a373790bcbe384f629eb077a Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <rampitec@users.noreply.github.com>
Date: Thu, 9 Jan 2025 09:56:44 -0800
Subject: [PATCH 18/79] Drop emitQuaternaryBuiltin from clang (#122169)

It was superceeded by the emitBuiltinWithOneOverloadedType() some
time ago.
---
 clang/lib/CodeGen/CGBuiltin.cpp | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 573be932f8b1a..ca03fb665d423 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -774,18 +774,6 @@ static Value *emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF,
   return CGF.Builder.CreateCall(F, Args, Name);
 }
 
-// Emit an intrinsic that has 4 operands of the same type as its result.
-static Value *emitQuaternaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
-                                    unsigned IntrinsicID) {
-  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
-  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
-  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
-  llvm::Value *Src3 = CGF.EmitScalarExpr(E->getArg(3));
-
-  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
-  return CGF.Builder.CreateCall(F, {Src0, Src1, Src2, Src3});
-}
-
 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
                                const CallExpr *E,
@@ -20443,7 +20431,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   }
   case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
   case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
-    return emitQuaternaryBuiltin(*this, E, Intrinsic::amdgcn_bitop3);
+    return emitBuiltinWithOneOverloadedType<4>(*this, E,
+                                               Intrinsic::amdgcn_bitop3);
   case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
     return emitBuiltinWithOneOverloadedType<4>(
         *this, E, Intrinsic::amdgcn_make_buffer_rsrc);

From 69b54c1a05c0c63ee28de1279b3a689b7f026e94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=98=D0=B7?=
 =?UTF-8?q?=D0=B2=D0=BE=D0=BB=D0=BE=D0=B2?= <dmitriy@izvolov.ru>
Date: Thu, 9 Jan 2025 21:02:35 +0300
Subject: [PATCH 19/79] [libcxx][algorithm] Optimize std::stable_sort via radix
 sort algorithm (#104683)

The radix sort (LSD) algorithm allows to speed up std::stable_sort
dramatically in case we sort integers.
The speed up varies from a relatively small to x10 times, depending on
type of sorted elements and the initial state of the sorted array.

```
Running ./libcxx/test/benchmarks/stable_sort.bench.out
Run on (12 X 2600 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB
  L1 Instruction 32 KiB
  L2 Unified 256 KiB (x6)
  L3 Unified 12288 KiB
Load Average: 3.48, 3.38, 3.08
---------------------------------------------------------------------------
Benchmark                                               After        Before
---------------------------------------------------------------------------
BM_StableSort_int8_Random_1                           3.39 ns       3.58 ns
BM_StableSort_int8_Random_4                           21.1 ns       21.9 ns
BM_StableSort_int8_Random_16                           142 ns        147 ns
BM_StableSort_int8_Random_64                           893 ns        903 ns
BM_StableSort_int8_Random_256                          409 ns       5810 ns
BM_StableSort_int8_Random_1024                        1235 ns      29973 ns
BM_StableSort_int8_Random_4096                        4410 ns     141880 ns
BM_StableSort_int8_Random_16384                      18044 ns     620540 ns
BM_StableSort_int8_Random_65536                     144030 ns    2592013 ns
BM_StableSort_int8_Random_262144                    858350 ns   10935814 ns
BM_StableSort_int8_Random_524288                   2929988 ns   27060729 ns
BM_StableSort_int8_Random_1048576                  6058292 ns   49622720 ns
BM_StableSort_int8_Ascending_1                        3.42 ns       3.92 ns
BM_StableSort_int8_Ascending_4                        5.86 ns       8.08 ns
BM_StableSort_int8_Ascending_16                       10.6 ns       12.0 ns
BM_StableSort_int8_Ascending_64                       28.9 ns       30.6 ns
BM_StableSort_int8_Ascending_256                       415 ns        391 ns
BM_StableSort_int8_Ascending_1024                     1666 ns       2309 ns
BM_StableSort_int8_Ascending_4096                     7748 ns      12269 ns
BM_StableSort_int8_Ascending_16384                   40588 ns      60181 ns
BM_StableSort_int8_Ascending_65536                  178843 ns     298221 ns
BM_StableSort_int8_Ascending_262144                 919959 ns    1402692 ns
BM_StableSort_int8_Ascending_524288                2397397 ns    3036984 ns
BM_StableSort_int8_Ascending_1048576               5080043 ns    7218581 ns
BM_StableSort_int8_Descending_1                       3.44 ns       3.53 ns
BM_StableSort_int8_Descending_4                       7.94 ns       8.29 ns
BM_StableSort_int8_Descending_16                      59.6 ns       57.7 ns
BM_StableSort_int8_Descending_64                      1051 ns       1027 ns
BM_StableSort_int8_Descending_256                      422 ns       4718 ns
BM_StableSort_int8_Descending_1024                    1676 ns      21044 ns
BM_StableSort_int8_Descending_4096                    7766 ns      64827 ns
BM_StableSort_int8_Descending_16384                  40230 ns      93981 ns
BM_StableSort_int8_Descending_65536                 190978 ns     421151 ns
BM_StableSort_int8_Descending_262144               1055141 ns    1918927 ns
BM_StableSort_int8_Descending_524288               2875115 ns    3809153 ns
BM_StableSort_int8_Descending_1048576              5854135 ns    8713690 ns
BM_StableSort_int8_SingleElement_1                    3.52 ns       3.46 ns
BM_StableSort_int8_SingleElement_4                    6.25 ns       5.79 ns
BM_StableSort_int8_SingleElement_16                   10.7 ns       11.4 ns
BM_StableSort_int8_SingleElement_64                   29.3 ns       30.3 ns
BM_StableSort_int8_SingleElement_256                   858 ns        380 ns
BM_StableSort_int8_SingleElement_1024                 3036 ns       2231 ns
BM_StableSort_int8_SingleElement_4096                11580 ns      11866 ns
BM_StableSort_int8_SingleElement_16384               44956 ns      59621 ns
BM_StableSort_int8_SingleElement_65536              182006 ns     297853 ns
BM_StableSort_int8_SingleElement_262144             962181 ns    1432857 ns
BM_StableSort_int8_SingleElement_524288            2256687 ns    2975707 ns
BM_StableSort_int8_SingleElement_1048576           4522556 ns    6949948 ns
BM_StableSort_int8_PipeOrgan_1                        3.26 ns       3.64 ns
BM_StableSort_int8_PipeOrgan_4                        6.21 ns       6.58 ns
BM_StableSort_int8_PipeOrgan_16                       23.7 ns       25.4 ns
BM_StableSort_int8_PipeOrgan_64                        250 ns        248 ns
BM_StableSort_int8_PipeOrgan_256                       414 ns       2498 ns
BM_StableSort_int8_PipeOrgan_1024                     1697 ns      10946 ns
BM_StableSort_int8_PipeOrgan_4096                     7840 ns      37238 ns
BM_StableSort_int8_PipeOrgan_16384                   41402 ns      74805 ns
BM_StableSort_int8_PipeOrgan_65536                  180107 ns     357891 ns
BM_StableSort_int8_PipeOrgan_262144                 988273 ns    1647296 ns
BM_StableSort_int8_PipeOrgan_524288                2547374 ns    3245991 ns
BM_StableSort_int8_PipeOrgan_1048576               5128783 ns    7342444 ns
BM_StableSort_int8_QuickSortAdversary_1               3.14 ns       4.01 ns
BM_StableSort_int8_QuickSortAdversary_4               6.05 ns       7.02 ns
BM_StableSort_int8_QuickSortAdversary_16              10.5 ns       11.9 ns
BM_StableSort_int8_QuickSortAdversary_64               520 ns        516 ns
BM_StableSort_int8_QuickSortAdversary_256              920 ns        386 ns
BM_StableSort_int8_QuickSortAdversary_1024            3083 ns       2299 ns
BM_StableSort_int8_QuickSortAdversary_4096           11659 ns      12295 ns
BM_StableSort_int8_QuickSortAdversary_16384          45721 ns      60931 ns
BM_StableSort_int8_QuickSortAdversary_65536         186334 ns     295423 ns
BM_StableSort_int8_QuickSortAdversary_262144        946262 ns    1399973 ns
BM_StableSort_int8_QuickSortAdversary_524288       2282004 ns    2832266 ns
BM_StableSort_int8_QuickSortAdversary_1048576      4691123 ns    6963253 ns
BM_StableSort_uint8_Random_1                          3.11 ns       3.44 ns
BM_StableSort_uint8_Random_4                          21.9 ns       23.1 ns
BM_StableSort_uint8_Random_16                          154 ns        171 ns
BM_StableSort_uint8_Random_64                         1000 ns       1051 ns
BM_StableSort_uint8_Random_256                         402 ns       6498 ns
BM_StableSort_uint8_Random_1024                       1176 ns      35310 ns
BM_StableSort_uint8_Random_4096                       4415 ns     164087 ns
BM_StableSort_uint8_Random_16384                     17849 ns     686769 ns
BM_StableSort_uint8_Random_65536                    146109 ns    2932051 ns
BM_StableSort_uint8_Random_262144                   876710 ns   12163988 ns
BM_StableSort_uint8_Random_524288                  2858089 ns   26458830 ns
BM_StableSort_uint8_Random_1048576                 5766942 ns   54836214 ns
BM_StableSort_uint8_Ascending_1                       3.11 ns       3.43 ns
BM_StableSort_uint8_Ascending_4                       6.18 ns       7.24 ns
BM_StableSort_uint8_Ascending_16                      14.5 ns       17.0 ns
BM_StableSort_uint8_Ascending_64                      50.7 ns       59.2 ns
BM_StableSort_uint8_Ascending_256                      395 ns        536 ns
BM_StableSort_uint8_Ascending_1024                    1752 ns       2956 ns
BM_StableSort_uint8_Ascending_4096                    7785 ns      15146 ns
BM_StableSort_uint8_Ascending_16384                  41442 ns      74136 ns
BM_StableSort_uint8_Ascending_65536                 180879 ns     354261 ns
BM_StableSort_uint8_Ascending_262144                945880 ns    1674256 ns
BM_StableSort_uint8_Ascending_524288               2287832 ns    3138581 ns
BM_StableSort_uint8_Ascending_1048576              4630290 ns    7296278 ns
BM_StableSort_uint8_Descending_1                      3.19 ns       3.63 ns
BM_StableSort_uint8_Descending_4                      9.60 ns       11.5 ns
BM_StableSort_uint8_Descending_16                     78.3 ns       86.0 ns
BM_StableSort_uint8_Descending_64                     1265 ns       1308 ns
BM_StableSort_uint8_Descending_256                     395 ns       6556 ns
BM_StableSort_uint8_Descending_1024                   1712 ns      24669 ns
BM_StableSort_uint8_Descending_4096                   7748 ns      83407 ns
BM_StableSort_uint8_Descending_16384                 40779 ns     104043 ns
BM_StableSort_uint8_Descending_65536                181560 ns     467680 ns
BM_StableSort_uint8_Descending_262144              1146627 ns    2102769 ns
BM_StableSort_uint8_Descending_524288              2874096 ns    4572229 ns
BM_StableSort_uint8_Descending_1048576             5873195 ns   10170663 ns
BM_StableSort_uint8_SingleElement_1                   3.28 ns       3.58 ns
BM_StableSort_uint8_SingleElement_4                   6.44 ns       7.40 ns
BM_StableSort_uint8_SingleElement_16                  14.9 ns       16.4 ns
BM_StableSort_uint8_SingleElement_64                  51.2 ns       52.9 ns
BM_StableSort_uint8_SingleElement_256                  876 ns        490 ns
BM_StableSort_uint8_SingleElement_1024                3041 ns       2750 ns
BM_StableSort_uint8_SingleElement_4096               11947 ns      14326 ns
BM_StableSort_uint8_SingleElement_16384              46669 ns      69984 ns
BM_StableSort_uint8_SingleElement_65536             197903 ns     328961 ns
BM_StableSort_uint8_SingleElement_262144           1031466 ns    1551436 ns
BM_StableSort_uint8_SingleElement_524288           2447672 ns    3049553 ns
BM_StableSort_uint8_SingleElement_1048576          4793087 ns    7615245 ns
BM_StableSort_uint8_PipeOrgan_1                       3.38 ns       3.56 ns
BM_StableSort_uint8_PipeOrgan_4                       7.16 ns       8.70 ns
BM_StableSort_uint8_PipeOrgan_16                      31.7 ns       35.3 ns
BM_StableSort_uint8_PipeOrgan_64                       326 ns        366 ns
BM_StableSort_uint8_PipeOrgan_256                      409 ns       2942 ns
BM_StableSort_uint8_PipeOrgan_1024                    1994 ns      12571 ns
BM_StableSort_uint8_PipeOrgan_4096                    8086 ns      46278 ns
BM_StableSort_uint8_PipeOrgan_16384                  41749 ns      79813 ns
BM_StableSort_uint8_PipeOrgan_65536                 180697 ns     375120 ns
BM_StableSort_uint8_PipeOrgan_262144               1004899 ns    1676143 ns
BM_StableSort_uint8_PipeOrgan_524288               2456081 ns    3333949 ns
BM_StableSort_uint8_PipeOrgan_1048576              5030857 ns    7591303 ns
BM_StableSort_uint8_QuickSortAdversary_1              3.12 ns       3.46 ns
BM_StableSort_uint8_QuickSortAdversary_4              7.25 ns       6.83 ns
BM_StableSort_uint8_QuickSortAdversary_16             14.6 ns       16.2 ns
BM_StableSort_uint8_QuickSortAdversary_64              650 ns        665 ns
BM_StableSort_uint8_QuickSortAdversary_256             395 ns       2982 ns
BM_StableSort_uint8_QuickSortAdversary_1024           3125 ns       2583 ns
BM_StableSort_uint8_QuickSortAdversary_4096          11797 ns      13929 ns
BM_StableSort_uint8_QuickSortAdversary_16384         45803 ns      66513 ns
BM_StableSort_uint8_QuickSortAdversary_65536        190745 ns     313467 ns
BM_StableSort_uint8_QuickSortAdversary_262144       974646 ns    1469014 ns
BM_StableSort_uint8_QuickSortAdversary_524288      2317553 ns    3022065 ns
BM_StableSort_uint8_QuickSortAdversary_1048576     4898703 ns    6854079 ns
BM_StableSort_int16_Random_1                          3.94 ns       3.49 ns
BM_StableSort_int16_Random_4                          20.8 ns       23.2 ns
BM_StableSort_int16_Random_16                          133 ns        163 ns
BM_StableSort_int16_Random_64                          903 ns        953 ns
BM_StableSort_int16_Random_256                        5638 ns       6258 ns
BM_StableSort_int16_Random_1024                       3056 ns      34587 ns
BM_StableSort_int16_Random_4096                      10596 ns     168397 ns
BM_StableSort_int16_Random_16384                     49908 ns     753031 ns
BM_StableSort_int16_Random_65536                    444605 ns    3838368 ns
BM_StableSort_int16_Random_262144                  2419345 ns   15657285 ns
BM_StableSort_int16_Random_524288                  7984040 ns   32726933 ns
BM_StableSort_int16_Random_1048576                16092424 ns   67999766 ns
BM_StableSort_int16_Ascending_1                       3.40 ns       3.43 ns
BM_StableSort_int16_Ascending_4                       5.45 ns       5.79 ns
BM_StableSort_int16_Ascending_16                      12.0 ns       15.3 ns
BM_StableSort_int16_Ascending_64                      39.6 ns       52.6 ns
BM_StableSort_int16_Ascending_256                      470 ns        550 ns
BM_StableSort_int16_Ascending_1024                    1686 ns       2707 ns
BM_StableSort_int16_Ascending_4096                    5676 ns      14165 ns
BM_StableSort_int16_Ascending_16384                  21413 ns      69483 ns
BM_StableSort_int16_Ascending_65536                  88010 ns     334466 ns
BM_StableSort_int16_Ascending_262144                567239 ns    1570620 ns
BM_StableSort_int16_Ascending_524288               1553063 ns    3424666 ns
BM_StableSort_int16_Ascending_1048576              3145577 ns    8499649 ns
BM_StableSort_int16_Descending_1                      3.22 ns       3.54 ns
BM_StableSort_int16_Descending_4                      6.85 ns       10.2 ns
BM_StableSort_int16_Descending_16                     62.7 ns       62.2 ns
BM_StableSort_int16_Descending_64                     1138 ns       1036 ns
BM_StableSort_int16_Descending_256                    5541 ns       4696 ns
BM_StableSort_int16_Descending_1024                   3046 ns      19577 ns
BM_StableSort_int16_Descending_4096                  10962 ns      79149 ns
BM_StableSort_int16_Descending_16384                 58182 ns     327709 ns
BM_StableSort_int16_Descending_65536                447025 ns    1424896 ns
BM_StableSort_int16_Descending_262144              1104973 ns    5921903 ns
BM_StableSort_int16_Descending_524288              2547840 ns   17956789 ns
BM_StableSort_int16_Descending_1048576             5093555 ns   17044318 ns
BM_StableSort_int16_SingleElement_1                   3.56 ns       3.96 ns
BM_StableSort_int16_SingleElement_4                   5.75 ns       6.72 ns
BM_StableSort_int16_SingleElement_16                  12.4 ns       16.1 ns
BM_StableSort_int16_SingleElement_64                  36.9 ns       54.4 ns
BM_StableSort_int16_SingleElement_256                  473 ns        557 ns
BM_StableSort_int16_SingleElement_1024                1828 ns       2826 ns
BM_StableSort_int16_SingleElement_4096                6239 ns      14252 ns
BM_StableSort_int16_SingleElement_16384              23695 ns      70369 ns
BM_StableSort_int16_SingleElement_65536              93281 ns     361641 ns
BM_StableSort_int16_SingleElement_262144            599078 ns    1640216 ns
BM_StableSort_int16_SingleElement_524288           1659678 ns    3343087 ns
BM_StableSort_int16_SingleElement_1048576          3184033 ns    7770271 ns
BM_StableSort_int16_PipeOrgan_1                       3.75 ns       3.76 ns
BM_StableSort_int16_PipeOrgan_4                       5.94 ns       7.74 ns
BM_StableSort_int16_PipeOrgan_16                      26.7 ns       25.9 ns
BM_StableSort_int16_PipeOrgan_64                       300 ns        263 ns
BM_StableSort_int16_PipeOrgan_256                     2769 ns       2760 ns
BM_StableSort_int16_PipeOrgan_1024                    2996 ns      10544 ns
BM_StableSort_int16_PipeOrgan_4096                   11641 ns      44750 ns
BM_StableSort_int16_PipeOrgan_16384                  57224 ns     200464 ns
BM_StableSort_int16_PipeOrgan_65536                 416873 ns     887631 ns
BM_StableSort_int16_PipeOrgan_262144                843264 ns    3588669 ns
BM_StableSort_int16_PipeOrgan_524288               2027741 ns   11056924 ns
BM_StableSort_int16_PipeOrgan_1048576              4223773 ns   13261276 ns
BM_StableSort_int16_QuickSortAdversary_1              3.83 ns       3.68 ns
BM_StableSort_int16_QuickSortAdversary_4              5.55 ns       6.93 ns
BM_StableSort_int16_QuickSortAdversary_16             12.3 ns       15.2 ns
BM_StableSort_int16_QuickSortAdversary_64              646 ns        632 ns
BM_StableSort_int16_QuickSortAdversary_256            2751 ns       2542 ns
BM_StableSort_int16_QuickSortAdversary_1024           3028 ns      16901 ns
BM_StableSort_int16_QuickSortAdversary_4096          10862 ns      80222 ns
BM_StableSort_int16_QuickSortAdversary_16384         57753 ns     317281 ns
BM_StableSort_int16_QuickSortAdversary_65536         94064 ns     328502 ns
BM_StableSort_int16_QuickSortAdversary_262144       557796 ns    1613208 ns
BM_StableSort_int16_QuickSortAdversary_524288      1518451 ns    3479740 ns
BM_StableSort_int16_QuickSortAdversary_1048576     3165129 ns    7655880 ns
BM_StableSort_uint16_Random_1                         3.26 ns       3.44 ns
BM_StableSort_uint16_Random_4                         21.1 ns       22.2 ns
BM_StableSort_uint16_Random_16                         157 ns        156 ns
BM_StableSort_uint16_Random_64                         955 ns        947 ns
BM_StableSort_uint16_Random_256                       5886 ns       6097 ns
BM_StableSort_uint16_Random_1024                      2787 ns      30776 ns
BM_StableSort_uint16_Random_4096                      9973 ns     155652 ns
BM_StableSort_uint16_Random_16384                    48628 ns     741072 ns
BM_StableSort_uint16_Random_65536                   439609 ns    3478966 ns
BM_StableSort_uint16_Random_262144                 2336983 ns   15197642 ns
BM_StableSort_uint16_Random_524288                 7888701 ns   34234254 ns
BM_StableSort_uint16_Random_1048576               14865180 ns   68516386 ns
BM_StableSort_uint16_Ascending_1                      3.33 ns       4.00 ns
BM_StableSort_uint16_Ascending_4                      5.79 ns       6.64 ns
BM_StableSort_uint16_Ascending_16                     14.9 ns       15.5 ns
BM_StableSort_uint16_Ascending_64                     50.2 ns       52.5 ns
BM_StableSort_uint16_Ascending_256                     538 ns        546 ns
BM_StableSort_uint16_Ascending_1024                   1645 ns       2652 ns
BM_StableSort_uint16_Ascending_4096                   5559 ns      14517 ns
BM_StableSort_uint16_Ascending_16384                 22803 ns      70275 ns
BM_StableSort_uint16_Ascending_65536                 83109 ns     333446 ns
BM_StableSort_uint16_Ascending_262144               562667 ns    1568670 ns
BM_StableSort_uint16_Ascending_524288              1564646 ns    3059839 ns
BM_StableSort_uint16_Ascending_1048576             3178826 ns    7048327 ns
BM_StableSort_uint16_Descending_1                     3.34 ns       3.93 ns
BM_StableSort_uint16_Descending_4                     8.75 ns       9.73 ns
BM_StableSort_uint16_Descending_16                    55.9 ns       55.5 ns
BM_StableSort_uint16_Descending_64                    1021 ns       1035 ns
BM_StableSort_uint16_Descending_256                   4752 ns       4931 ns
BM_StableSort_uint16_Descending_1024                  2982 ns      19727 ns
BM_StableSort_uint16_Descending_4096                 10432 ns      83165 ns
BM_StableSort_uint16_Descending_16384                56593 ns     326131 ns
BM_StableSort_uint16_Descending_65536               439134 ns    1371346 ns
BM_StableSort_uint16_Descending_262144             1220925 ns    5735665 ns
BM_StableSort_uint16_Descending_524288             2767234 ns   16758330 ns
BM_StableSort_uint16_Descending_1048576            5673769 ns   17541715 ns
BM_StableSort_uint16_SingleElement_1                  3.53 ns       3.73 ns
BM_StableSort_uint16_SingleElement_4                  6.27 ns       5.81 ns
BM_StableSort_uint16_SingleElement_16                 14.8 ns       15.1 ns
BM_StableSort_uint16_SingleElement_64                 51.5 ns       50.9 ns
BM_StableSort_uint16_SingleElement_256                 536 ns        540 ns
BM_StableSort_uint16_SingleElement_1024               1669 ns       2690 ns
BM_StableSort_uint16_SingleElement_4096               5840 ns      14230 ns
BM_StableSort_uint16_SingleElement_16384             22468 ns      68524 ns
BM_StableSort_uint16_SingleElement_65536             89845 ns     332187 ns
BM_StableSort_uint16_SingleElement_262144           590736 ns    1550868 ns
BM_StableSort_uint16_SingleElement_524288          1573677 ns    3095703 ns
BM_StableSort_uint16_SingleElement_1048576         3183421 ns    8251180 ns
BM_StableSort_uint16_PipeOrgan_1                      3.70 ns       3.64 ns
BM_StableSort_uint16_PipeOrgan_4                      7.01 ns       6.81 ns
BM_StableSort_uint16_PipeOrgan_16                     25.7 ns       26.4 ns
BM_StableSort_uint16_PipeOrgan_64                      283 ns        277 ns
BM_StableSort_uint16_PipeOrgan_256                    2562 ns       2852 ns
BM_StableSort_uint16_PipeOrgan_1024                   2863 ns      10892 ns
BM_StableSort_uint16_PipeOrgan_4096                  10585 ns      45668 ns
BM_StableSort_uint16_PipeOrgan_16384                 59151 ns     194358 ns
BM_StableSort_uint16_PipeOrgan_65536                508579 ns     854692 ns
BM_StableSort_uint16_PipeOrgan_262144               901294 ns    3606346 ns
BM_StableSort_uint16_PipeOrgan_524288              2192498 ns   10449279 ns
BM_StableSort_uint16_PipeOrgan_1048576             4204368 ns   11956606 ns
BM_StableSort_uint16_QuickSortAdversary_1             3.20 ns       3.63 ns
BM_StableSort_uint16_QuickSortAdversary_4             5.30 ns       6.38 ns
BM_StableSort_uint16_QuickSortAdversary_16            14.5 ns       15.3 ns
BM_StableSort_uint16_QuickSortAdversary_64             575 ns        611 ns
BM_StableSort_uint16_QuickSortAdversary_256           2423 ns       2577 ns
BM_StableSort_uint16_QuickSortAdversary_1024          2794 ns      16854 ns
BM_StableSort_uint16_QuickSortAdversary_4096         10511 ns      75952 ns
BM_StableSort_uint16_QuickSortAdversary_16384        56214 ns     333824 ns
BM_StableSort_uint16_QuickSortAdversary_65536       422512 ns    1354867 ns
BM_StableSort_uint16_QuickSortAdversary_262144      583301 ns    1564443 ns
BM_StableSort_uint16_QuickSortAdversary_524288     1584319 ns    3265575 ns
BM_StableSort_uint16_QuickSortAdversary_1048576    3197732 ns    7945245 ns
BM_StableSort_int32_Random_1                          3.81 ns       3.70 ns
BM_StableSort_int32_Random_4                          20.8 ns       23.4 ns
BM_StableSort_int32_Random_16                          134 ns        161 ns
BM_StableSort_int32_Random_64                          895 ns        984 ns
BM_StableSort_int32_Random_256                        5640 ns       5897 ns
BM_StableSort_int32_Random_1024                       6994 ns      32118 ns
BM_StableSort_int32_Random_4096                      27367 ns     168960 ns
BM_StableSort_int32_Random_16384                    183261 ns     843240 ns
BM_StableSort_int32_Random_65536                    950914 ns    3953588 ns
BM_StableSort_int32_Random_262144                  3673311 ns   16790171 ns
BM_StableSort_int32_Random_524288                 11515700 ns   36023098 ns
BM_StableSort_int32_Random_1048576                24492515 ns   78116028 ns
BM_StableSort_int32_Ascending_1                       3.31 ns       4.48 ns
BM_StableSort_int32_Ascending_4                       5.96 ns       6.99 ns
BM_StableSort_int32_Ascending_16                      13.0 ns       16.0 ns
BM_StableSort_int32_Ascending_64                      36.7 ns       53.0 ns
BM_StableSort_int32_Ascending_256                      391 ns        471 ns
BM_StableSort_int32_Ascending_1024                    2705 ns       2682 ns
BM_StableSort_int32_Ascending_4096                    8773 ns      14231 ns
BM_StableSort_int32_Ascending_16384                  34709 ns      70625 ns
BM_StableSort_int32_Ascending_65536                 142907 ns     344482 ns
BM_StableSort_int32_Ascending_262144                745483 ns    1591418 ns
BM_StableSort_int32_Ascending_524288               1873701 ns    3190305 ns
BM_StableSort_int32_Ascending_1048576              3851590 ns    7570095 ns
BM_StableSort_int32_Descending_1                      3.22 ns       4.23 ns
BM_StableSort_int32_Descending_4                      7.58 ns       11.2 ns
BM_StableSort_int32_Descending_16                     63.9 ns       58.6 ns
BM_StableSort_int32_Descending_64                     1133 ns       1017 ns
BM_StableSort_int32_Descending_256                    4850 ns       4464 ns
BM_StableSort_int32_Descending_1024                   7023 ns      18954 ns
BM_StableSort_int32_Descending_4096                  28550 ns      75163 ns
BM_StableSort_int32_Descending_16384                200880 ns     341104 ns
BM_StableSort_int32_Descending_65536               1095910 ns    1398021 ns
BM_StableSort_int32_Descending_262144              3818864 ns    5695486 ns
BM_StableSort_int32_Descending_524288              5606779 ns   17593982 ns
BM_StableSort_int32_Descending_1048576            16416366 ns   26649503 ns
BM_StableSort_int32_SingleElement_1                   3.81 ns       3.71 ns
BM_StableSort_int32_SingleElement_4                   6.57 ns       6.61 ns
BM_StableSort_int32_SingleElement_16                  14.0 ns       15.8 ns
BM_StableSort_int32_SingleElement_64                  38.7 ns       53.5 ns
BM_StableSort_int32_SingleElement_256                  386 ns        554 ns
BM_StableSort_int32_SingleElement_1024                2761 ns       3046 ns
BM_StableSort_int32_SingleElement_4096                9179 ns      15188 ns
BM_StableSort_int32_SingleElement_16384              34794 ns      70119 ns
BM_StableSort_int32_SingleElement_65536             135190 ns     354755 ns
BM_StableSort_int32_SingleElement_262144            760995 ns    1644072 ns
BM_StableSort_int32_SingleElement_524288           1969575 ns    3343419 ns
BM_StableSort_int32_SingleElement_1048576          4423816 ns    8346971 ns
BM_StableSort_int32_PipeOrgan_1                       3.79 ns       3.63 ns
BM_StableSort_int32_PipeOrgan_4                       6.21 ns       6.73 ns
BM_StableSort_int32_PipeOrgan_16                      27.5 ns       26.0 ns
BM_StableSort_int32_PipeOrgan_64                       291 ns        265 ns
BM_StableSort_int32_PipeOrgan_256                     2557 ns       2518 ns
BM_StableSort_int32_PipeOrgan_1024                    6765 ns      10976 ns
BM_StableSort_int32_PipeOrgan_4096                   26373 ns      44537 ns
BM_StableSort_int32_PipeOrgan_16384                 201466 ns     188582 ns
BM_StableSort_int32_PipeOrgan_65536                1148533 ns     802368 ns
BM_StableSort_int32_PipeOrgan_262144               2255177 ns    3477829 ns
BM_StableSort_int32_PipeOrgan_524288               3947015 ns   10356637 ns
BM_StableSort_int32_PipeOrgan_1048576             10274312 ns   16405366 ns
BM_StableSort_int32_QuickSortAdversary_1              3.32 ns       4.36 ns
BM_StableSort_int32_QuickSortAdversary_4              5.98 ns       7.44 ns
BM_StableSort_int32_QuickSortAdversary_16             13.0 ns       16.3 ns
BM_StableSort_int32_QuickSortAdversary_64              657 ns        616 ns
BM_StableSort_int32_QuickSortAdversary_256            2569 ns       2483 ns
BM_StableSort_int32_QuickSortAdversary_1024           6898 ns      19635 ns
BM_StableSort_int32_QuickSortAdversary_4096          27092 ns      75108 ns
BM_StableSort_int32_QuickSortAdversary_16384        190379 ns     316463 ns
BM_StableSort_int32_QuickSortAdversary_65536       1109040 ns    1319018 ns
BM_StableSort_int32_QuickSortAdversary_262144      4361925 ns    5472779 ns
BM_StableSort_int32_QuickSortAdversary_524288      6528215 ns   17538983 ns
BM_StableSort_int32_QuickSortAdversary_1048576    18345325 ns   27223926 ns
BM_StableSort_uint32_Random_1                         3.67 ns       3.82 ns
BM_StableSort_uint32_Random_4                         22.3 ns       21.8 ns
BM_StableSort_uint32_Random_16                         155 ns        153 ns
BM_StableSort_uint32_Random_64                         946 ns        976 ns
BM_StableSort_uint32_Random_256                       5824 ns       6019 ns
BM_StableSort_uint32_Random_1024                      4525 ns      32764 ns
BM_StableSort_uint32_Random_4096                     17223 ns     158608 ns
BM_StableSort_uint32_Random_16384                   134821 ns     748525 ns
BM_StableSort_uint32_Random_65536                   716644 ns    3453325 ns
BM_StableSort_uint32_Random_262144                 3628062 ns   16065414 ns
BM_StableSort_uint32_Random_524288                10971334 ns   36567712 ns
BM_StableSort_uint32_Random_1048576               22688377 ns   77533497 ns
BM_StableSort_uint32_Ascending_1                      3.57 ns       3.44 ns
BM_StableSort_uint32_Ascending_4                      5.73 ns       5.33 ns
BM_StableSort_uint32_Ascending_16                     14.5 ns       14.0 ns
BM_StableSort_uint32_Ascending_64                     50.3 ns       51.3 ns
BM_StableSort_uint32_Ascending_256                     465 ns        467 ns
BM_StableSort_uint32_Ascending_1024                   3042 ns       2530 ns
BM_StableSort_uint32_Ascending_4096                   9842 ns      12207 ns
BM_StableSort_uint32_Ascending_16384                 37994 ns      61726 ns
BM_StableSort_uint32_Ascending_65536                148890 ns     294385 ns
BM_StableSort_uint32_Ascending_262144               855080 ns    1422167 ns
BM_StableSort_uint32_Ascending_524288              2154903 ns    3203018 ns
BM_StableSort_uint32_Ascending_1048576             5002518 ns    7563817 ns
BM_StableSort_uint32_Descending_1                     3.51 ns       3.40 ns
BM_StableSort_uint32_Descending_4                     9.09 ns       7.95 ns
BM_StableSort_uint32_Descending_16                    54.8 ns       74.4 ns
BM_StableSort_uint32_Descending_64                    1003 ns       1305 ns
BM_StableSort_uint32_Descending_256                   4545 ns       5300 ns
BM_StableSort_uint32_Descending_1024                  4361 ns      21884 ns
BM_StableSort_uint32_Descending_4096                 16018 ns      90534 ns
BM_StableSort_uint32_Descending_16384               146274 ns     381943 ns
BM_StableSort_uint32_Descending_65536               938248 ns    1536806 ns
BM_StableSort_uint32_Descending_262144             3899300 ns    6387843 ns
BM_StableSort_uint32_Descending_524288             5808157 ns   21959858 ns
BM_StableSort_uint32_Descending_1048576           17520047 ns   26351912 ns
BM_StableSort_uint32_SingleElement_1                  4.03 ns       3.97 ns
BM_StableSort_uint32_SingleElement_4                  6.55 ns       6.41 ns
BM_StableSort_uint32_SingleElement_16                 15.6 ns       15.8 ns
BM_StableSort_uint32_SingleElement_64                 52.3 ns       58.7 ns
BM_StableSort_uint32_SingleElement_256                 473 ns        485 ns
BM_StableSort_uint32_SingleElement_1024               3020 ns       2407 ns
BM_StableSort_uint32_SingleElement_4096               9998 ns      12527 ns
BM_StableSort_uint32_SingleElement_16384             38072 ns      62228 ns
BM_StableSort_uint32_SingleElement_65536            153706 ns     295662 ns
BM_StableSort_uint32_SingleElement_262144           836532 ns    1477099 ns
BM_StableSort_uint32_SingleElement_524288          2144900 ns    3157204 ns
BM_StableSort_uint32_SingleElement_1048576         4995525 ns    7617233 ns
BM_StableSort_uint32_PipeOrgan_1                      4.02 ns       3.99 ns
BM_StableSort_uint32_PipeOrgan_4                      6.97 ns       6.84 ns
BM_StableSort_uint32_PipeOrgan_16                     26.1 ns       29.7 ns
BM_StableSort_uint32_PipeOrgan_64                      266 ns        333 ns
BM_StableSort_uint32_PipeOrgan_256                    2462 ns       2892 ns
BM_StableSort_uint32_PipeOrgan_1024                   4291 ns      12431 ns
BM_StableSort_uint32_PipeOrgan_4096                  15638 ns      51449 ns
BM_StableSort_uint32_PipeOrgan_16384                154563 ns     217460 ns
BM_StableSort_uint32_PipeOrgan_65536                907724 ns     925873 ns
BM_StableSort_uint32_PipeOrgan_262144              2394580 ns    4103575 ns
BM_StableSort_uint32_PipeOrgan_524288              4177145 ns   13947158 ns
BM_StableSort_uint32_PipeOrgan_1048576            11848224 ns   18807297 ns
BM_StableSort_uint32_QuickSortAdversary_1             3.50 ns       3.43 ns
BM_StableSort_uint32_QuickSortAdversary_4             5.88 ns       4.96 ns
BM_StableSort_uint32_QuickSortAdversary_16            14.6 ns       14.0 ns
BM_StableSort_uint32_QuickSortAdversary_64             576 ns        715 ns
BM_StableSort_uint32_QuickSortAdversary_256           2353 ns       2797 ns
BM_StableSort_uint32_QuickSortAdversary_1024          4176 ns      21775 ns
BM_StableSort_uint32_QuickSortAdversary_4096         15565 ns      96188 ns
BM_StableSort_uint32_QuickSortAdversary_16384       149092 ns     398332 ns
BM_StableSort_uint32_QuickSortAdversary_65536       902488 ns    1552393 ns
BM_StableSort_uint32_QuickSortAdversary_262144     3946517 ns    6560414 ns
BM_StableSort_uint32_QuickSortAdversary_524288     6247114 ns   22420977 ns
BM_StableSort_uint32_QuickSortAdversary_1048576   19892446 ns   26529576 ns
BM_StableSort_int64_Random_1                          3.83 ns       3.98 ns
BM_StableSort_int64_Random_4                          21.1 ns       24.0 ns
BM_StableSort_int64_Random_16                          129 ns        136 ns
BM_StableSort_int64_Random_64                          890 ns        906 ns
BM_StableSort_int64_Random_256                        5542 ns       5901 ns
BM_StableSort_int64_Random_1024                      16085 ns      33112 ns
BM_StableSort_int64_Random_4096                      63895 ns     162181 ns
BM_StableSort_int64_Random_16384                    348827 ns     790045 ns
BM_StableSort_int64_Random_65536                   1488237 ns    3557506 ns
BM_StableSort_int64_Random_262144                  8195713 ns   16315808 ns
BM_StableSort_int64_Random_524288                 16586833 ns   38274075 ns
BM_StableSort_int64_Random_1048576                40346644 ns   79182089 ns
BM_StableSort_int64_Ascending_1                       3.76 ns       3.55 ns
BM_StableSort_int64_Ascending_4                       5.82 ns       6.19 ns
BM_StableSort_int64_Ascending_16                      11.7 ns       11.8 ns
BM_StableSort_int64_Ascending_64                      32.9 ns       36.8 ns
BM_StableSort_int64_Ascending_256                      415 ns        550 ns
BM_StableSort_int64_Ascending_1024                    5352 ns       3347 ns
BM_StableSort_int64_Ascending_4096                   17516 ns      19134 ns
BM_StableSort_int64_Ascending_16384                  64147 ns      91099 ns
BM_StableSort_int64_Ascending_65536                 322126 ns     434009 ns
BM_StableSort_int64_Ascending_262144               1554669 ns    2057056 ns
BM_StableSort_int64_Ascending_524288               3656527 ns    5016650 ns
BM_StableSort_int64_Ascending_1048576             10469979 ns   12908613 ns
BM_StableSort_int64_Descending_1                      4.09 ns       3.35 ns
BM_StableSort_int64_Descending_4                      9.13 ns       8.01 ns
BM_StableSort_int64_Descending_16                     76.8 ns       92.9 ns
BM_StableSort_int64_Descending_64                     1336 ns       1417 ns
BM_StableSort_int64_Descending_256                    5525 ns       5674 ns
BM_StableSort_int64_Descending_1024                  17461 ns      22558 ns
BM_StableSort_int64_Descending_4096                  64285 ns     102360 ns
BM_StableSort_int64_Descending_16384                336946 ns     388940 ns
BM_StableSort_int64_Descending_65536                837912 ns    1662169 ns
BM_StableSort_int64_Descending_262144              3680806 ns    7494323 ns
BM_StableSort_int64_Descending_524288             11023784 ns   24935033 ns
BM_StableSort_int64_Descending_1048576            20023568 ns   33220712 ns
BM_StableSort_int64_SingleElement_1                   3.37 ns       3.98 ns
BM_StableSort_int64_SingleElement_4                   5.32 ns       6.92 ns
BM_StableSort_int64_SingleElement_16                  10.9 ns       13.3 ns
BM_StableSort_int64_SingleElement_64                  32.1 ns       43.8 ns
BM_StableSort_int64_SingleElement_256                  420 ns        541 ns
BM_StableSort_int64_SingleElement_1024                5689 ns       3381 ns
BM_StableSort_int64_SingleElement_4096               19199 ns      17989 ns
BM_StableSort_int64_SingleElement_16384              75754 ns      91963 ns
BM_StableSort_int64_SingleElement_65536             357106 ns     500326 ns
BM_StableSort_int64_SingleElement_262144           1672975 ns    2417734 ns
BM_StableSort_int64_SingleElement_524288           3642891 ns    5200878 ns
BM_StableSort_int64_SingleElement_1048576         11172007 ns   13729511 ns
BM_StableSort_int64_PipeOrgan_1                       3.38 ns       3.94 ns
BM_StableSort_int64_PipeOrgan_4                       5.73 ns       6.44 ns
BM_StableSort_int64_PipeOrgan_16                      27.5 ns       29.0 ns
BM_StableSort_int64_PipeOrgan_64                       310 ns        321 ns
BM_StableSort_int64_PipeOrgan_256                     2761 ns       2918 ns
BM_StableSort_int64_PipeOrgan_1024                   16105 ns      12525 ns
BM_StableSort_int64_PipeOrgan_4096                   65289 ns      59990 ns
BM_StableSort_int64_PipeOrgan_16384                 341757 ns     270636 ns
BM_StableSort_int64_PipeOrgan_65536                 587452 ns    1126132 ns
BM_StableSort_int64_PipeOrgan_262144               2837955 ns    5034180 ns
BM_StableSort_int64_PipeOrgan_524288               6617313 ns   15267354 ns
BM_StableSort_int64_PipeOrgan_1048576             15208796 ns   23162989 ns
BM_StableSort_int64_QuickSortAdversary_1              3.77 ns       3.45 ns
BM_StableSort_int64_QuickSortAdversary_4              5.55 ns       5.20 ns
BM_StableSort_int64_QuickSortAdversary_16             12.5 ns       11.5 ns
BM_StableSort_int64_QuickSortAdversary_64              646 ns        750 ns
BM_StableSort_int64_QuickSortAdversary_256            2655 ns       3539 ns
BM_StableSort_int64_QuickSortAdversary_1024          16373 ns      22349 ns
BM_StableSort_int64_QuickSortAdversary_4096          62306 ns      97248 ns
BM_StableSort_int64_QuickSortAdversary_16384        321755 ns     388084 ns
BM_StableSort_int64_QuickSortAdversary_65536       1374694 ns    1596091 ns
BM_StableSort_int64_QuickSortAdversary_262144      4374661 ns    6894139 ns
BM_StableSort_int64_QuickSortAdversary_524288     12736074 ns   23932229 ns
BM_StableSort_int64_QuickSortAdversary_1048576    22615219 ns   33355629 ns
BM_StableSort_uint64_Random_1                         3.82 ns       3.49 ns
BM_StableSort_uint64_Random_4                         22.4 ns       23.4 ns
BM_StableSort_uint64_Random_16                         154 ns        146 ns
BM_StableSort_uint64_Random_64                         924 ns        926 ns
BM_StableSort_uint64_Random_256                       5864 ns       5913 ns
BM_StableSort_uint64_Random_1024                      7168 ns      31746 ns
BM_StableSort_uint64_Random_4096                     27668 ns     154224 ns
BM_StableSort_uint64_Random_16384                   219526 ns     755205 ns
BM_StableSort_uint64_Random_65536                   965251 ns    3490165 ns
BM_StableSort_uint64_Random_262144                 6262162 ns   15889589 ns
BM_StableSort_uint64_Random_524288                12530078 ns   36458581 ns
BM_StableSort_uint64_Random_1048576               38462191 ns   75168445 ns
BM_StableSort_uint64_Ascending_1                      3.30 ns       3.35 ns
BM_StableSort_uint64_Ascending_4                      5.65 ns       5.84 ns
BM_StableSort_uint64_Ascending_16                     14.7 ns       12.6 ns
BM_StableSort_uint64_Ascending_64                     55.3 ns       34.6 ns
BM_StableSort_uint64_Ascending_256                     513 ns        533 ns
BM_StableSort_uint64_Ascending_1024                   5541 ns       3189 ns
BM_StableSort_uint64_Ascending_4096                  17706 ns      20326 ns
BM_StableSort_uint64_Ascending_16384                 66420 ns      93757 ns
BM_StableSort_uint64_Ascending_65536                341425 ns     435016 ns
BM_StableSort_uint64_Ascending_262144              1595691 ns    2088317 ns
BM_StableSort_uint64_Ascending_524288              3808703 ns    5092832 ns
BM_StableSort_uint64_Ascending_1048576            11060417 ns   13023250 ns
BM_StableSort_uint64_Descending_1                     3.29 ns       3.35 ns
BM_StableSort_uint64_Descending_4                     8.65 ns       7.92 ns
BM_StableSort_uint64_Descending_16                    54.7 ns       80.2 ns
BM_StableSort_uint64_Descending_64                    1028 ns       1307 ns
BM_StableSort_uint64_Descending_256                   4521 ns       5635 ns
BM_StableSort_uint64_Descending_1024                  7122 ns      23323 ns
BM_StableSort_uint64_Descending_4096                 30538 ns      95892 ns
BM_StableSort_uint64_Descending_16384               195565 ns     392721 ns
BM_StableSort_uint64_Descending_65536               852002 ns    1720358 ns
BM_StableSort_uint64_Descending_262144             3737884 ns    7484130 ns
BM_StableSort_uint64_Descending_524288            11159345 ns   25690770 ns
BM_StableSort_uint64_Descending_1048576           20648864 ns   33057383 ns
BM_StableSort_uint64_SingleElement_1                  3.62 ns       4.10 ns
BM_StableSort_uint64_SingleElement_4                  6.73 ns       6.64 ns
BM_StableSort_uint64_SingleElement_16                 14.9 ns       11.3 ns
BM_StableSort_uint64_SingleElement_64                 52.0 ns       33.0 ns
BM_StableSort_uint64_SingleElement_256                 511 ns        582 ns
BM_StableSort_uint64_SingleElement_1024               6499 ns       3287 ns
BM_StableSort_uint64_SingleElement_4096              22190 ns      17616 ns
BM_StableSort_uint64_SingleElement_16384             84378 ns      86885 ns
BM_StableSort_uint64_SingleElement_65536            466257 ns     457144 ns
BM_StableSort_uint64_SingleElement_262144          1993687 ns    2361999 ns
BM_StableSort_uint64_SingleElement_524288          4759565 ns    5096771 ns
BM_StableSort_uint64_SingleElement_1048576        12426111 ns   13468453 ns
BM_StableSort_uint64_PipeOrgan_1                      3.73 ns       3.94 ns
BM_StableSort_uint64_PipeOrgan_4                      7.18 ns       7.54 ns
BM_StableSort_uint64_PipeOrgan_16                     25.2 ns       29.1 ns
BM_StableSort_uint64_PipeOrgan_64                      260 ns        321 ns
BM_StableSort_uint64_PipeOrgan_256                    2468 ns       2970 ns
BM_StableSort_uint64_PipeOrgan_1024                   7025 ns      12912 ns
BM_StableSort_uint64_PipeOrgan_4096                  28968 ns      53379 ns
BM_StableSort_uint64_PipeOrgan_16384                194156 ns     239790 ns
BM_StableSort_uint64_PipeOrgan_65536                599491 ns     993800 ns
BM_StableSort_uint64_PipeOrgan_262144              2648585 ns    4689680 ns
BM_StableSort_uint64_PipeOrgan_524288              7621109 ns   15401808 ns
BM_StableSort_uint64_PipeOrgan_1048576            15608814 ns   23484821 ns
BM_StableSort_uint64_QuickSortAdversary_1             3.38 ns       3.54 ns
BM_StableSort_uint64_QuickSortAdversary_4             5.50 ns       6.03 ns
BM_StableSort_uint64_QuickSortAdversary_16            14.2 ns       11.0 ns
BM_StableSort_uint64_QuickSortAdversary_64             597 ns        688 ns
BM_StableSort_uint64_QuickSortAdversary_256           2446 ns       2818 ns
BM_StableSort_uint64_QuickSortAdversary_1024          7266 ns      20319 ns
BM_StableSort_uint64_QuickSortAdversary_4096         31155 ns      89112 ns
BM_StableSort_uint64_QuickSortAdversary_16384       201033 ns     390574 ns
BM_StableSort_uint64_QuickSortAdversary_65536       871014 ns    1685639 ns
BM_StableSort_uint64_QuickSortAdversary_262144     3978535 ns    7265830 ns
BM_StableSort_uint64_QuickSortAdversary_524288    10279721 ns   25350004 ns
BM_StableSort_uint64_QuickSortAdversary_1048576   20256585 ns   33054393 ns
```
---
 libcxx/docs/ReleaseNotes/19.rst               |   2 +
 libcxx/include/CMakeLists.txt                 |   1 +
 libcxx/include/__algorithm/radix_sort.h       | 332 ++++++++++++++++++
 libcxx/include/__algorithm/stable_sort.h      |  44 +++
 libcxx/include/__bit/bit_log2.h               |  11 +-
 libcxx/include/module.modulemap               |   1 +
 .../alg.sort/stable.sort/stable_sort.pass.cpp | 117 +++---
 7 files changed, 449 insertions(+), 59 deletions(-)
 create mode 100644 libcxx/include/__algorithm/radix_sort.h

diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index e8f76773c5437..aec7865d52fad 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -126,6 +126,8 @@ Improvements and New Features
 
 - In C++23 and C++26 the number of transitive includes in several headers has been reduced, improving the compilation speed.
 
+- ``std::stable_sort`` uses radix sort for integral types now, which can improve the performance up to 10 times, depending
+  on type of sorted elements and the initial state of the sorted array.
 
 Deprecations and Removals
 -------------------------
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 0b484ebe5e87c..96071527b5283 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -73,6 +73,7 @@ set(files
   __algorithm/prev_permutation.h
   __algorithm/pstl.h
   __algorithm/push_heap.h
+  __algorithm/radix_sort.h
   __algorithm/ranges_adjacent_find.h
   __algorithm/ranges_all_of.h
   __algorithm/ranges_any_of.h
diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h
new file mode 100644
index 0000000000000..c39b26a8620f4
--- /dev/null
+++ b/libcxx/include/__algorithm/radix_sort.h
@@ -0,0 +1,332 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_RADIX_SORT_H
+#define _LIBCPP___ALGORITHM_RADIX_SORT_H
+
+// This is an implementation of classic LSD radix sort algorithm, running in linear time and using `O(max(N, M))`
+// additional memory, where `N` is size of an input range, `M` - maximum value of
+// a radix of the sorted integer type. Type of the radix and its maximum value are determined at compile time
+// based on type returned by function `__radix`. The default radix is uint8.
+
+// The algorithm is equivalent to several consecutive calls of counting sort for each
+// radix of the sorted numbers from low to high byte.
+// The algorithm uses a temporary buffer of size equal to size of the input range. Each `i`-th pass
+// of the algorithm sorts values by `i`-th radix and moves values to the temporary buffer (for each even `i`, counted
+// from zero), or moves them back to the initial range (for each odd `i`). If there is only one radix in sorted integers
+// (e.g. int8), the sorted values are placed to the buffer, and then moved back to the initial range.
+
+// The implementation also has several optimizations:
+// - the counters for the counting sort are calculated in one pass for all radices;
+// - if all values of a radix are the same, we do not sort that radix, and just move items to the buffer;
+// - if two consecutive radices satisfies condition above, we do nothing for these two radices.
+
+#include <__algorithm/for_each.h>
+#include <__algorithm/move.h>
+#include <__bit/bit_log2.h>
+#include <__bit/countl.h>
+#include <__config>
+#include <__functional/identity.h>
+#include <__iterator/distance.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/move_iterator.h>
+#include <__iterator/next.h>
+#include <__iterator/reverse_iterator.h>
+#include <__numeric/partial_sum.h>
+#include <__type_traits/decay.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_assignable.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_unsigned.h>
+#include <__type_traits/make_unsigned.h>
+#include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
+#include <__utility/move.h>
+#include <__utility/pair.h>
+#include <climits>
+#include <cstdint>
+#include <initializer_list>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 14
+
+template <class _InputIterator, class _OutputIterator>
+_LIBCPP_HIDE_FROM_ABI pair<_OutputIterator, __iter_value_type<_InputIterator>>
+__partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) {
+  if (__first == __last)
+    return {__result, 0};
+
+  auto __max                              = *__first;
+  __iter_value_type<_InputIterator> __sum = *__first;
+  *__result                               = __sum;
+
+  while (++__first != __last) {
+    if (__max < *__first) {
+      __max = *__first;
+    }
+    __sum       = std::move(__sum) + *__first;
+    *++__result = __sum;
+  }
+  return {++__result, __max};
+}
+
+template <class _Value, class _Map, class _Radix>
+struct __radix_sort_traits {
+  using __image_type = decay_t<typename __invoke_of<_Map, _Value>::type>;
+  static_assert(is_unsigned<__image_type>::value);
+
+  using __radix_type = decay_t<typename __invoke_of<_Radix, __image_type>::type>;
+  static_assert(is_integral<__radix_type>::value);
+
+  static constexpr auto __radix_value_range = numeric_limits<__radix_type>::max() + 1;
+  static constexpr auto __radix_size        = std::__bit_log2<uint64_t>(__radix_value_range);
+  static constexpr auto __radix_count       = sizeof(__image_type) * CHAR_BIT / __radix_size;
+};
+
+template <class _Value, class _Map>
+struct __counting_sort_traits {
+  using __image_type = decay_t<typename __invoke_of<_Map, _Value>::type>;
+  static_assert(is_unsigned<__image_type>::value);
+
+  static constexpr const auto __value_range = numeric_limits<__image_type>::max() + 1;
+  static constexpr auto __radix_size        = std::__bit_log2<uint64_t>(__value_range);
+};
+
+template <class _Radix, class _Integer>
+_LIBCPP_HIDE_FROM_ABI auto __nth_radix(size_t __radix_number, _Radix __radix, _Integer __n) {
+  static_assert(is_unsigned<_Integer>::value);
+  using __traits = __counting_sort_traits<_Integer, _Radix>;
+
+  return __radix(static_cast<_Integer>(__n >> __traits::__radix_size * __radix_number));
+}
+
+template <class _ForwardIterator, class _Map, class _RandomAccessIterator>
+_LIBCPP_HIDE_FROM_ABI void
+__collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
+  using __value_type = __iter_value_type<_ForwardIterator>;
+  using __traits     = __counting_sort_traits<__value_type, _Map>;
+
+  std::for_each(__first, __last, [&__counters, &__map](const auto& __preimage) { ++__counters[__map(__preimage)]; });
+
+  const auto __counters_end = __counters + __traits::__value_range;
+  std::partial_sum(__counters, __counters_end, __counters);
+}
+
+template <class _ForwardIterator, class _RandomAccessIterator1, class _Map, class _RandomAccessIterator2>
+_LIBCPP_HIDE_FROM_ABI void
+__dispose(_ForwardIterator __first,
+          _ForwardIterator __last,
+          _RandomAccessIterator1 __result,
+          _Map __map,
+          _RandomAccessIterator2 __counters) {
+  std::for_each(__first, __last, [&__result, &__counters, &__map](auto&& __preimage) {
+    auto __index      = __counters[__map(__preimage)]++;
+    __result[__index] = std::move(__preimage);
+  });
+}
+
+template <class _ForwardIterator,
+          class _Map,
+          class _Radix,
+          class _RandomAccessIterator1,
+          class _RandomAccessIterator2,
+          size_t... _Radices>
+_LIBCPP_HIDE_FROM_ABI bool __collect_impl(
+    _ForwardIterator __first,
+    _ForwardIterator __last,
+    _Map __map,
+    _Radix __radix,
+    _RandomAccessIterator1 __counters,
+    _RandomAccessIterator2 __maximums,
+    index_sequence<_Radices...>) {
+  using __value_type                 = __iter_value_type<_ForwardIterator>;
+  constexpr auto __radix_value_range = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_value_range;
+
+  auto __previous  = numeric_limits<typename __invoke_of<_Map, __value_type>::type>::min();
+  auto __is_sorted = true;
+  std::for_each(__first, __last, [&__counters, &__map, &__radix, &__previous, &__is_sorted](const auto& __value) {
+    auto __current = __map(__value);
+    __is_sorted &= (__current >= __previous);
+    __previous = __current;
+
+    (++__counters[_Radices][std::__nth_radix(_Radices, __radix, __current)], ...);
+  });
+
+  ((__maximums[_Radices] =
+        std::__partial_sum_max(__counters[_Radices], __counters[_Radices] + __radix_value_range, __counters[_Radices])
+            .second),
+   ...);
+
+  return __is_sorted;
+}
+
+template <class _ForwardIterator, class _Map, class _Radix, class _RandomAccessIterator1, class _RandomAccessIterator2>
+_LIBCPP_HIDE_FROM_ABI bool
+__collect(_ForwardIterator __first,
+          _ForwardIterator __last,
+          _Map __map,
+          _Radix __radix,
+          _RandomAccessIterator1 __counters,
+          _RandomAccessIterator2 __maximums) {
+  using __value_type           = __iter_value_type<_ForwardIterator>;
+  constexpr auto __radix_count = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_count;
+  return std::__collect_impl(
+      __first, __last, __map, __radix, __counters, __maximums, make_index_sequence<__radix_count>());
+}
+
+template <class _BidirectionalIterator, class _RandomAccessIterator1, class _Map, class _RandomAccessIterator2>
+_LIBCPP_HIDE_FROM_ABI void __dispose_backward(
+    _BidirectionalIterator __first,
+    _BidirectionalIterator __last,
+    _RandomAccessIterator1 __result,
+    _Map __map,
+    _RandomAccessIterator2 __counters) {
+  std::for_each(std::make_reverse_iterator(__last),
+                std::make_reverse_iterator(__first),
+                [&__result, &__counters, &__map](auto&& __preimage) {
+                  auto __index      = --__counters[__map(__preimage)];
+                  __result[__index] = std::move(__preimage);
+                });
+}
+
+template <class _ForwardIterator, class _RandomAccessIterator, class _Map>
+_LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
+__counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) {
+  using __value_type = __iter_value_type<_ForwardIterator>;
+  using __traits     = __counting_sort_traits<__value_type, _Map>;
+
+  __iter_diff_t<_RandomAccessIterator> __counters[__traits::__value_range + 1] = {0};
+
+  std::__collect(__first, __last, __map, std::next(std::begin(__counters)));
+  std::__dispose(__first, __last, __result, __map, std::begin(__counters));
+
+  return __result + __counters[__traits::__value_range];
+}
+
+template <class _RandomAccessIterator1,
+          class _RandomAccessIterator2,
+          class _Map,
+          class _Radix,
+          enable_if_t< __radix_sort_traits<__iter_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count == 1,
+                       int> = 0>
+_LIBCPP_HIDE_FROM_ABI void __radix_sort_impl(
+    _RandomAccessIterator1 __first,
+    _RandomAccessIterator1 __last,
+    _RandomAccessIterator2 __buffer,
+    _Map __map,
+    _Radix __radix) {
+  auto __buffer_end = std::__counting_sort_impl(__first, __last, __buffer, [&__map, &__radix](const auto& __value) {
+    return __radix(__map(__value));
+  });
+
+  std::move(__buffer, __buffer_end, __first);
+}
+
+template <
+    class _RandomAccessIterator1,
+    class _RandomAccessIterator2,
+    class _Map,
+    class _Radix,
+    enable_if_t< __radix_sort_traits<__iter_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count % 2 == 0,
+                 int> = 0 >
+_LIBCPP_HIDE_FROM_ABI void __radix_sort_impl(
+    _RandomAccessIterator1 __first,
+    _RandomAccessIterator1 __last,
+    _RandomAccessIterator2 __buffer_begin,
+    _Map __map,
+    _Radix __radix) {
+  using __value_type = __iter_value_type<_RandomAccessIterator1>;
+  using __traits     = __radix_sort_traits<__value_type, _Map, _Radix>;
+
+  __iter_diff_t<_RandomAccessIterator1> __counters[__traits::__radix_count][__traits::__radix_value_range] = {{0}};
+  __iter_diff_t<_RandomAccessIterator1> __maximums[__traits::__radix_count]                                = {0};
+  const auto __is_sorted = std::__collect(__first, __last, __map, __radix, __counters, __maximums);
+  if (!__is_sorted) {
+    const auto __range_size = std::distance(__first, __last);
+    auto __buffer_end       = __buffer_begin + __range_size;
+    for (size_t __radix_number = 0; __radix_number < __traits::__radix_count; __radix_number += 2) {
+      const auto __n0th_is_single = __maximums[__radix_number] == __range_size;
+      const auto __n1th_is_single = __maximums[__radix_number + 1] == __range_size;
+
+      if (__n0th_is_single && __n1th_is_single) {
+        continue;
+      }
+
+      if (__n0th_is_single) {
+        std::move(__first, __last, __buffer_begin);
+      } else {
+        auto __n0th = [__radix_number, &__map, &__radix](const auto& __v) {
+          return std::__nth_radix(__radix_number, __radix, __map(__v));
+        };
+        std::__dispose_backward(__first, __last, __buffer_begin, __n0th, __counters[__radix_number]);
+      }
+
+      if (__n1th_is_single) {
+        std::move(__buffer_begin, __buffer_end, __first);
+      } else {
+        auto __n1th = [__radix_number, &__map, &__radix](const auto& __v) {
+          return std::__nth_radix(__radix_number + 1, __radix, __map(__v));
+        };
+        std::__dispose_backward(__buffer_begin, __buffer_end, __first, __n1th, __counters[__radix_number + 1]);
+      }
+    }
+  }
+}
+
+_LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(bool __b) { return __b; }
+
+template <class _Ip>
+_LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(_Ip __n) {
+  constexpr const auto __min_value = numeric_limits<_Ip>::min();
+  return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value);
+}
+
+struct __low_byte_fn {
+  template <class _Ip>
+  _LIBCPP_HIDE_FROM_ABI constexpr uint8_t operator()(_Ip __integer) const {
+    static_assert(is_unsigned<_Ip>::value);
+
+    return static_cast<uint8_t>(__integer & 0xff);
+  }
+};
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Map, class _Radix>
+_LIBCPP_HIDE_FROM_ABI void
+__radix_sort(_RandomAccessIterator1 __first,
+             _RandomAccessIterator1 __last,
+             _RandomAccessIterator2 __buffer,
+             _Map __map,
+             _Radix __radix) {
+  auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) { return std::__shift_to_unsigned(__map(__x)); };
+  std::__radix_sort_impl(__first, __last, __buffer, __map_to_unsigned, __radix);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2>
+_LIBCPP_HIDE_FROM_ABI void
+__radix_sort(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __buffer) {
+  std::__radix_sort(__first, __last, __buffer, __identity{}, __low_byte_fn{});
+}
+
+#endif // _LIBCPP_STD_VER >= 14
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___ALGORITHM_RADIX_SORT_H
diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h
index 1111f5509bc38..70a85023a17f0 100644
--- a/libcxx/include/__algorithm/stable_sort.h
+++ b/libcxx/include/__algorithm/stable_sort.h
@@ -13,6 +13,7 @@
 #include <__algorithm/comp_ref_type.h>
 #include <__algorithm/inplace_merge.h>
 #include <__algorithm/iterator_operations.h>
+#include <__algorithm/radix_sort.h>
 #include <__algorithm/sort.h>
 #include <__config>
 #include <__cstddef/ptrdiff_t.h>
@@ -21,7 +22,12 @@
 #include <__memory/destruct_n.h>
 #include <__memory/unique_ptr.h>
 #include <__memory/unique_temporary_buffer.h>
+#include <__type_traits/desugars_to.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_same.h>
 #include <__type_traits/is_trivially_assignable.h>
+#include <__type_traits/remove_cvref.h>
 #include <__utility/move.h>
 #include <__utility/pair.h>
 
@@ -189,6 +195,28 @@ struct __stable_sort_switch {
   static const unsigned value = 128 * is_trivially_copy_assignable<_Tp>::value;
 };
 
+#if _LIBCPP_STD_VER >= 17
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() {
+  static_assert(is_integral<_Tp>::value);
+  if constexpr (sizeof(_Tp) == 1) {
+    return 1 << 8;
+  }
+
+  return 1 << 10;
+}
+
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_max_bound() {
+  static_assert(is_integral<_Tp>::value);
+  if constexpr (sizeof(_Tp) >= 8) {
+    return 1 << 15;
+  }
+
+  return 1 << 16;
+}
+#endif // _LIBCPP_STD_VER >= 17
+
 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
 void __stable_sort(_RandomAccessIterator __first,
                    _RandomAccessIterator __last,
@@ -211,6 +239,22 @@ void __stable_sort(_RandomAccessIterator __first,
     std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
     return;
   }
+
+#if _LIBCPP_STD_VER >= 17
+  constexpr auto __default_comp =
+      __desugars_to_v<__totally_ordered_less_tag, __remove_cvref_t<_Compare>, value_type, value_type >;
+  constexpr auto __integral_value =
+      is_integral_v<value_type > && is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
+  constexpr auto __allowed_radix_sort = __default_comp && __integral_value;
+  if constexpr (__allowed_radix_sort) {
+    if (__len <= __buff_size && __len >= static_cast<difference_type>(__radix_sort_min_bound<value_type>()) &&
+        __len <= static_cast<difference_type>(__radix_sort_max_bound<value_type>())) {
+      std::__radix_sort(__first, __last, __buff);
+      return;
+    }
+  }
+#endif // _LIBCPP_STD_VER >= 17
+
   typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2;
   _RandomAccessIterator __m                                             = __first + __l2;
   if (__len <= __buff_size) {
diff --git a/libcxx/include/__bit/bit_log2.h b/libcxx/include/__bit/bit_log2.h
index 62936f6786860..94ee6c3b2bb1d 100644
--- a/libcxx/include/__bit/bit_log2.h
+++ b/libcxx/include/__bit/bit_log2.h
@@ -10,8 +10,8 @@
 #define _LIBCPP___BIT_BIT_LOG2_H
 
 #include <__bit/countl.h>
-#include <__concepts/arithmetic.h>
 #include <__config>
+#include <__type_traits/is_unsigned_integer.h>
 #include <limits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -20,14 +20,15 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 20
+#if _LIBCPP_STD_VER >= 14
 
-template <__libcpp_unsigned_integer _Tp>
+template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI constexpr _Tp __bit_log2(_Tp __t) noexcept {
-  return numeric_limits<_Tp>::digits - 1 - std::countl_zero(__t);
+  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__bit_log2 requires an unsigned integer type");
+  return numeric_limits<_Tp>::digits - 1 - std::__countl_zero(__t);
 }
 
-#endif // _LIBCPP_STD_VER >= 20
+#endif // _LIBCPP_STD_VER >= 14
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 86efbd36b20d1..70aee7da79cba 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -488,6 +488,7 @@ module std [system] {
     module prev_permutation                       { header "__algorithm/prev_permutation.h" }
     module pstl                                   { header "__algorithm/pstl.h" }
     module push_heap                              { header "__algorithm/push_heap.h" }
+    module radix_sort                             { header "__algorithm/radix_sort.h" }
     module ranges_adjacent_find                   { header "__algorithm/ranges_adjacent_find.h" }
     module ranges_all_of                          { header "__algorithm/ranges_all_of.h" }
     module ranges_any_of                          { header "__algorithm/ranges_any_of.h" }
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
index 4301d22027de8..621234354092d 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
@@ -50,16 +50,16 @@ template <class RI>
 void
 test_sort_driver_driver(RI f, RI l, int start, RI real_last)
 {
-    for (RI i = l; i > f + start;)
-    {
-        *--i = start;
-        if (f == i)
-        {
-            test_sort_helper(f, real_last);
-        }
+  using value_type = typename std::iterator_traits<RI>::value_type;
+
+  for (RI i = l; i > f + start;) {
+    *--i = static_cast<value_type>(start);
+    if (f == i) {
+      test_sort_helper(f, real_last);
+    }
     if (start > 0)
         test_sort_driver_driver(f, i, start-1, real_last);
-    }
+  }
 }
 
 template <class RI>
@@ -69,55 +69,61 @@ test_sort_driver(RI f, RI l, int start)
     test_sort_driver_driver(f, l, start, l);
 }
 
+template <int sa, class V>
+void test_sort_() {
+  V ia[sa];
+  for (int i = 0; i < sa; ++i) {
+    test_sort_driver(ia, ia + sa, i);
+  }
+}
+
 template <int sa>
-void
-test_sort_()
-{
-    int ia[sa];
-    for (int i = 0; i < sa; ++i)
-    {
-        test_sort_driver(ia, ia+sa, i);
-    }
+void test_sort_() {
+  test_sort_<sa, int>();
+  test_sort_<sa, float>();
 }
 
-void
-test_larger_sorts(int N, int M)
-{
-    assert(N != 0);
-    assert(M != 0);
-    // create array length N filled with M different numbers
-    int* array = new int[N];
-    int x = 0;
-    for (int i = 0; i < N; ++i)
-    {
-        array[i] = x;
-        if (++x == M)
-            x = 0;
-    }
-    // test saw tooth pattern
-    std::stable_sort(array, array+N);
-    assert(std::is_sorted(array, array+N));
-    // test random pattern
-    std::shuffle(array, array+N, randomness);
-    std::stable_sort(array, array+N);
-    assert(std::is_sorted(array, array+N));
-    // test sorted pattern
-    std::stable_sort(array, array+N);
-    assert(std::is_sorted(array, array+N));
-    // test reverse sorted pattern
-    std::reverse(array, array+N);
-    std::stable_sort(array, array+N);
-    assert(std::is_sorted(array, array+N));
-    // test swap ranges 2 pattern
-    std::swap_ranges(array, array+N/2, array+N/2);
-    std::stable_sort(array, array+N);
-    assert(std::is_sorted(array, array+N));
-    // test reverse swap ranges 2 pattern
-    std::reverse(array, array+N);
-    std::swap_ranges(array, array+N/2, array+N/2);
-    std::stable_sort(array, array+N);
-    assert(std::is_sorted(array, array+N));
-    delete [] array;
+template <class V>
+void test_larger_sorts(int N, int M) {
+  assert(N != 0);
+  assert(M != 0);
+  // create array length N filled with M different numbers
+  V* array = new V[N];
+  int x    = 0;
+  for (int i = 0; i < N; ++i) {
+    array[i] = static_cast<V>(x);
+    if (++x == M)
+      x = 0;
+  }
+  // test saw tooth pattern
+  std::stable_sort(array, array + N);
+  assert(std::is_sorted(array, array + N));
+  // test random pattern
+  std::shuffle(array, array + N, randomness);
+  std::stable_sort(array, array + N);
+  assert(std::is_sorted(array, array + N));
+  // test sorted pattern
+  std::stable_sort(array, array + N);
+  assert(std::is_sorted(array, array + N));
+  // test reverse sorted pattern
+  std::reverse(array, array + N);
+  std::stable_sort(array, array + N);
+  assert(std::is_sorted(array, array + N));
+  // test swap ranges 2 pattern
+  std::swap_ranges(array, array + N / 2, array + N / 2);
+  std::stable_sort(array, array + N);
+  assert(std::is_sorted(array, array + N));
+  // test reverse swap ranges 2 pattern
+  std::reverse(array, array + N);
+  std::swap_ranges(array, array + N / 2, array + N / 2);
+  std::stable_sort(array, array + N);
+  assert(std::is_sorted(array, array + N));
+  delete[] array;
+}
+
+void test_larger_sorts(int N, int M) {
+  test_larger_sorts<int>(N, M);
+  test_larger_sorts<float>(N, M);
 }
 
 void
@@ -156,6 +162,9 @@ int main(int, char**)
     test_larger_sorts(997);
     test_larger_sorts(1000);
     test_larger_sorts(1009);
+    test_larger_sorts(1024);
+    test_larger_sorts(1031);
+    test_larger_sorts(2053);
 
 #if !defined(TEST_HAS_NO_EXCEPTIONS)
     { // check that the algorithm works without memory

From 0e1c5bfac8574bc8419968279069c76b55e5f270 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 9 Jan 2025 18:02:43 +0000
Subject: [PATCH 20/79] [gn build] Port 69b54c1a05c0

---
 llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index d4f0a2924ab4d..1144402befa4d 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -145,6 +145,7 @@ if (current_toolchain == default_toolchain) {
       "__algorithm/prev_permutation.h",
       "__algorithm/pstl.h",
       "__algorithm/push_heap.h",
+      "__algorithm/radix_sort.h",
       "__algorithm/ranges_adjacent_find.h",
       "__algorithm/ranges_all_of.h",
       "__algorithm/ranges_any_of.h",

From a6b7181733c83523a39d4f4e788c6b7a227d477d Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi@microsoft.com>
Date: Thu, 9 Jan 2025 13:11:52 -0500
Subject: [PATCH 21/79] [HLSL] Move length support out of the DirectX Backend 
 (#121611)

## Changes
- Delete DirectX length intrinsic
- Delete HLSL length lang builtin
- Implement length algorithm entirely in the header.

## History
- In the past if an HLSL intrinsic lowered to either a spirv op code or
a DXIL opcode we represented it with intrinsics

## Why we are moving away?
- To make HLSL apis more portable the team decided that it makes sense
for some intrinsics to be defined only in the header.
- Since there tends to be more SPIRV opcodes than DXIL opcodes the plan
is to support SPIRV opcodes either with target specific builtins or via
pattern matching.
---
 clang/include/clang/Basic/Builtins.td         |   6 -
 clang/lib/CodeGen/CGBuiltin.cpp               |  14 --
 clang/lib/CodeGen/CGHLSLRuntime.h             |   1 -
 clang/lib/Headers/hlsl/hlsl_detail.h          |  23 ++
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      |  29 +--
 clang/lib/Sema/SemaHLSL.cpp                   |  18 --
 clang/test/CodeGenHLSL/builtins/length.hlsl   | 203 +++++++++++-------
 .../test/SemaHLSL/BuiltIns/length-errors.hlsl |  51 +++--
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   1 -
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |  30 ---
 llvm/test/CodeGen/DirectX/length.ll           | 116 ----------
 llvm/test/CodeGen/DirectX/length_error.ll     |  10 -
 .../DirectX/length_invalid_intrinsic_error.ll |  10 -
 .../length_invalid_intrinsic_error_scalar.ll  |  10 -
 14 files changed, 198 insertions(+), 324 deletions(-)
 delete mode 100644 llvm/test/CodeGen/DirectX/length.ll
 delete mode 100644 llvm/test/CodeGen/DirectX/length_error.ll
 delete mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
 delete mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 468c16050e2bf..f4216bd01a074 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4865,12 +4865,6 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-def HLSLLength : LangBuiltin<"HLSL_LANG"> {
-  let Spellings = ["__builtin_hlsl_length"];
-  let Attributes = [NoThrow, Const];
-  let Prototype = "void(...)";
-}
-
 def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_lerp"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ca03fb665d423..2b09197669996 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19334,20 +19334,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
         ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
   }
-  case Builtin::BI__builtin_hlsl_length: {
-    Value *X = EmitScalarExpr(E->getArg(0));
-
-    assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
-           "length operand must have a float representation");
-    // if the operand is a scalar, we can use the fabs llvm intrinsic directly
-    if (!E->getArg(0)->getType()->isVectorType())
-      return EmitFAbs(*this, X);
-
-    return Builder.CreateIntrinsic(
-        /*ReturnType=*/X->getType()->getScalarType(),
-        CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
-        nullptr, "hlsl.length");
-  }
   case Builtin::BI__builtin_hlsl_normalize: {
     Value *X = EmitScalarExpr(E->getArg(0));
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 46e472f0aae21..00e110e8e6fa2 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -77,7 +77,6 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 8d5fd94133153..392075d276b18 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -13,6 +13,14 @@ namespace hlsl {
 
 namespace __detail {
 
+template <typename T, typename U> struct is_same {
+  static const bool value = false;
+};
+
+template <typename T> struct is_same<T, T> {
+  static const bool value = true;
+};
+
 template <bool B, typename T> struct enable_if {};
 
 template <typename T> struct enable_if<true, T> {
@@ -33,6 +41,21 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
   return __builtin_bit_cast(U, F);
 }
 
+template <typename T>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_impl(T X) {
+  return __builtin_elementwise_abs(X);
+}
+
+template <typename T, int N>
+enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_vec_impl(vector<T, N> X) {
+  vector<T, N> XSquared = X * X;
+  T XSquaredSum = XSquared[0];
+  [unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i];
+  return __builtin_elementwise_sqrt(XSquaredSum);
+}
+
 } // namespace __detail
 } // namespace hlsl
 #endif //_HLSL_HLSL_DETAILS_H_
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index b745997f1d5a2..cf287e598f76b 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1297,27 +1297,16 @@ float4 lerp(float4, float4, float4);
 ///
 /// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).
 
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half4);
+const inline half length(half X) { return __detail::length_impl(X); }
+const inline float length(float X) { return __detail::length_impl(X); }
 
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float4);
+template <int N> const inline half length(vector<half, N> X) {
+  return __detail::length_vec_impl(X);
+}
+
+template <int N> const inline float length(vector<float, N> X) {
+  return __detail::length_vec_impl(X);
+}
 
 //===----------------------------------------------------------------------===//
 // log builtins
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 600c800029fd0..64b6fe4cd5eb4 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2100,24 +2100,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
-  case Builtin::BI__builtin_hlsl_length: {
-    if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
-      return true;
-    if (SemaRef.checkArgCount(TheCall, 1))
-      return true;
-
-    ExprResult A = TheCall->getArg(0);
-    QualType ArgTyA = A.get()->getType();
-    QualType RetTy;
-
-    if (auto *VTy = ArgTyA->getAs<VectorType>())
-      RetTy = VTy->getElementType();
-    else
-      RetTy = TheCall->getArg(0)->getType();
-
-    TheCall->setType(RetTy);
-    break;
-  }
   case Builtin::BI__builtin_hlsl_mad: {
     if (SemaRef.checkArgCount(TheCall, 3))
       return true;
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index a24f01d275440..ecf2edcf0b05f 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -1,73 +1,130 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-
-// NATIVE_HALF: define noundef nofpclass(nan inf) half @
-// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16(half
-// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float
-// NATIVE_HALF: ret half
-// NO_HALF: ret float
-half test_length_half(half p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef nofpclass(nan inf) half @
-// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v2f16
-// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half2(half2 p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef nofpclass(nan inf) half @
-// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v3f16
-// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half3(half3 p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef nofpclass(nan inf) half @
-// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v4f16
-// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half4(half4 p0)
-{
-  return length(p0);
-}
-
-// CHECK: define noundef nofpclass(nan inf) float @
-// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float
-// CHECK: ret float
-float test_length_float(float p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef nofpclass(nan inf) float @
-// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32(
-// CHECK: ret float %hlsl.length
-float test_length_float2(float2 p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef nofpclass(nan inf) float @
-// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32(
-// CHECK: ret float %hlsl.length
-float test_length_float3(float3 p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef nofpclass(nan inf) float @
-// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32(
-// CHECK: ret float %hlsl.length
-float test_length_float4(float4 p0)
-{
-  return length(p0);
-}
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s
+
+// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh(
+// CHECK-SAME: half noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.fabs.f16(half [[P0]])
+// CHECK-NEXT:    ret half [[ELT_ABS_I]]
+//
+half test_length_half(half p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh(
+// CHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x half> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+half test_length_half2(half2 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh(
+// CHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x half> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
+// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_1]])
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+half test_length_half3(half3 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh(
+// CHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x half> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
+// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]]
+// CHECK-NEXT:    [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
+// CHECK-NEXT:    [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_2]], [[ADD_I_1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_2]])
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+half test_length_half4(half4 p0)
+{
+  return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf(
+// CHECK-SAME: float noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.fabs.f32(float [[P0]])
+// CHECK-NEXT:    ret float [[ELT_ABS_I]]
+//
+float test_length_float(float p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f(
+// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_length_float2(float2 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f(
+// CHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
+// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_1]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_length_float3(float3 p0)
+{
+  return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f(
+// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
+// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]]
+// CHECK-NEXT:    [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
+// CHECK-NEXT:    [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_2]], [[ADD_I_1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_2]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_length_float4(float4 p0)
+{
+  return length(p0);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
index 281faada6f5e9..a191f0419fbba 100644
--- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
@@ -1,32 +1,53 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
-
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
 
 void test_too_few_arg()
 {
-  return __builtin_hlsl_length();
-  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+  return length();
+  // expected-error@-1 {{no matching function for call to 'length'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
 }
 
 void test_too_many_arg(float2 p0)
 {
-  return __builtin_hlsl_length(p0, p0);
-  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+  return length(p0, p0);
+  // expected-error@-1 {{no matching function for call to 'length'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
+}
+
+float double_to_float_type(double p0) {
+  return length(p0);
+  // expected-error@-1  {{call to 'length' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
 }
 
-bool builtin_bool_to_float_type_promotion(bool p1)
+
+float bool_to_float_type_promotion(bool p1)
 {
-  return __builtin_hlsl_length(p1);
-  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+  return length(p1);
+  // expected-error@-1  {{call to 'length' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
 }
 
-bool builtin_length_int_to_float_promotion(int p1)
+float length_int_to_float_promotion(int p1)
 {
-  return __builtin_hlsl_length(p1);
-  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+  return length(p1);
+  // expected-error@-1  {{call to 'length' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
 }
 
-bool2 builtin_length_int2_to_float2_promotion(int2 p1)
+float2 length_int2_to_float2_promotion(int2 p1)
 {
-  return __builtin_hlsl_length(p1);
-  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+  return length(p1);
+  // expected-error@-1  {{call to 'length' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 3b1d1a88e01a8..ef48af5b42dbf 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -93,7 +93,6 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1
 def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
     [IntrNoMem]>;
 
-def int_dx_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
 def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 51dc3025f0c37..cf142806bb1df 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -57,7 +57,6 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_nclamp:
   case Intrinsic::dx_degrees:
   case Intrinsic::dx_lerp:
-  case Intrinsic::dx_length:
   case Intrinsic::dx_normalize:
   case Intrinsic::dx_fdot:
   case Intrinsic::dx_sdot:
@@ -292,32 +291,6 @@ static Value *expandAnyOrAllIntrinsic(CallInst *Orig,
   return Result;
 }
 
-static Value *expandLengthIntrinsic(CallInst *Orig) {
-  Value *X = Orig->getOperand(0);
-  IRBuilder<> Builder(Orig);
-  Type *Ty = X->getType();
-  Type *EltTy = Ty->getScalarType();
-
-  // Though dx.length does work on scalar type, we can optimize it to just emit
-  // fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because
-  // CGBuiltin.cpp should have emitted a fabs call.
-  Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0);
-  auto *XVec = dyn_cast<FixedVectorType>(Ty);
-  unsigned XVecSize = XVec->getNumElements();
-  if (!(Ty->isVectorTy() && XVecSize > 1))
-    report_fatal_error(Twine("Invalid input type for length intrinsic"),
-                       /* gen_crash_diag=*/false);
-
-  Value *Sum = Builder.CreateFMul(Elt, Elt);
-  for (unsigned I = 1; I < XVecSize; I++) {
-    Elt = Builder.CreateExtractElement(X, I);
-    Value *Mul = Builder.CreateFMul(Elt, Elt);
-    Sum = Builder.CreateFAdd(Sum, Mul);
-  }
-  return Builder.CreateIntrinsic(EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum},
-                                 nullptr, "elt.sqrt");
-}
-
 static Value *expandLerpIntrinsic(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
   Value *Y = Orig->getOperand(1);
@@ -589,9 +562,6 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_lerp:
     Result = expandLerpIntrinsic(Orig);
     break;
-  case Intrinsic::dx_length:
-    Result = expandLengthIntrinsic(Orig);
-    break;
   case Intrinsic::dx_normalize:
     Result = expandNormalizeIntrinsic(Orig);
     break;
diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll
deleted file mode 100644
index fc5868a7f6e82..0000000000000
--- a/llvm/test/CodeGen/DirectX/length.ll
+++ /dev/null
@@ -1,116 +0,0 @@
-; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
-; RUN: opt -S  -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
-
-; Make sure dxil operation function calls for length are generated for half/float.
-
-declare half @llvm.fabs.f16(half)
-declare half @llvm.dx.length.v2f16(<2 x half>)
-declare half @llvm.dx.length.v3f16(<3 x half>)
-declare half @llvm.dx.length.v4f16(<4 x half>)
-
-declare float @llvm.fabs.f32(float)
-declare float @llvm.dx.length.v2f32(<2 x float>)
-declare float @llvm.dx.length.v3f32(<3 x float>)
-declare float @llvm.dx.length.v4f32(<4 x float>)
-
-define noundef half @test_length_half2(<2 x half> noundef %p0) {
-entry:
-  ; CHECK: extractelement <2 x half> %{{.*}}, i64 0
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <2 x half> %{{.*}}, i64 1
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
-  ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
-
-  %hlsl.length = call half @llvm.dx.length.v2f16(<2 x half> %p0)
-  ret half %hlsl.length
-}
-
-define noundef half @test_length_half3(<3 x half> noundef %p0) {
-entry:
-  ; CHECK: extractelement <3 x half> %{{.*}}, i64 0
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <3 x half> %{{.*}}, i64 1
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <3 x half> %{{.*}}, i64 2
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
-  ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
-
-  %hlsl.length = call half @llvm.dx.length.v3f16(<3 x half> %p0)
-  ret half %hlsl.length
-}
-
-define noundef half @test_length_half4(<4 x half> noundef %p0) {
-entry:
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 0
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 1
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 2
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 3
-  ; CHECK: fmul half %{{.*}}, %{{.*}}
-  ; CHECK: fadd half %{{.*}}, %{{.*}}
-  ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
-  ; DOPCHECK:  call half @dx.op.unary.f16(i32 24, half %{{.*}})
-
-  %hlsl.length = call half @llvm.dx.length.v4f16(<4 x half> %p0)
-  ret half %hlsl.length
-}
-
-define noundef float @test_length_float2(<2 x float> noundef %p0) {
-entry:
-  ; CHECK: extractelement <2 x float> %{{.*}}, i64 0
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <2 x float> %{{.*}}, i64 1
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
-  ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
-
-  %hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %p0)
-  ret float %hlsl.length
-}
-
-define noundef float @test_length_float3(<3 x float> noundef %p0) {
-entry:
-  ; CHECK: extractelement <3 x float> %{{.*}}, i64 0
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <3 x float> %{{.*}}, i64 1
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <3 x float> %{{.*}}, i64 2
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
-  ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
-
-  %hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %p0)
-  ret float %hlsl.length
-}
-
-define noundef float @test_length_float4(<4 x float> noundef %p0) {
-entry:
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 0
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 1
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 2
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 3
-  ; CHECK: fmul float %{{.*}}, %{{.*}}
-  ; CHECK: fadd float %{{.*}}, %{{.*}}
-  ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
-  ; DOPCHECK:  call float @dx.op.unary.f32(i32 24, float %{{.*}})
-
-  %hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %p0)
-  ret float %hlsl.length
-}
diff --git a/llvm/test/CodeGen/DirectX/length_error.ll b/llvm/test/CodeGen/DirectX/length_error.ll
deleted file mode 100644
index 143b41fc506e1..0000000000000
--- a/llvm/test/CodeGen/DirectX/length_error.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation length does not support double overload type
-; CHECK: Cannot create Sqrt operation: Invalid overload type
-
-define noundef double @test_length_double2(<2 x double> noundef %p0) {
-entry:
-  %hlsl.length = call double @llvm.dx.length.v2f32(<2 x double> %p0)
-  ret double %hlsl.length
-}
diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
deleted file mode 100644
index f722de2f9029e..0000000000000
--- a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation length does not support 1-element vector types.
-; CHECK: LLVM ERROR: Invalid input type for length intrinsic
-
-define noundef float @test_length_float(<1 x float> noundef %p0) {
-entry:
-  %hlsl.length = call float @llvm.dx.length.v1f32(<1 x float> %p0)
-  ret float %hlsl.length
-}
diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
deleted file mode 100644
index ac3a0513eb6b2..0000000000000
--- a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
-
-; DXIL operation length does not support scalar types
-; CHECK: error: invalid intrinsic signature
-
-define noundef float @test_length_float(float noundef %p0) {
-entry:
-  %hlsl.length = call float @llvm.dx.length.f32(float %p0)
-  ret float %hlsl.length
-}

From b6cda338ab7bd18265a3e3fd494856620aec5210 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 9 Jan 2025 18:19:39 +0000
Subject: [PATCH 22/79] [Loads] Also consider getPointerAlignment when checking
 assumptions. (#120916)

Also use getPointerAlignment when trying to use alignment and
dereferenceable assumptions. This catches cases where dereferencable is
known via the assumption but alignment is known via getPointerAlignment
(e.g. via argument attribute or align of 1)

PR: https://github.com/llvm/llvm-project/pull/120916
---
 llvm/lib/Analysis/Loads.cpp                   |  6 ++-
 ...able-info-from-assumption-constant-size.ll | 50 +++----------------
 2 files changed, 12 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index bc03e4052a705..7bbd469bd035d 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -170,9 +170,10 @@ static bool isDereferenceableAndAlignedPointer(
 
   if (CtxI) {
     /// Look through assumes to see if both dereferencability and alignment can
-    /// be provent by an assume
+    /// be proven by an assume if needed.
     RetainedKnowledge AlignRK;
     RetainedKnowledge DerefRK;
+    bool IsAligned = V->getPointerAlignment(DL) >= Alignment;
     if (getKnowledgeForValue(
             V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
             [&](RetainedKnowledge RK, Instruction *Assume, auto) {
@@ -182,7 +183,8 @@ static bool isDereferenceableAndAlignedPointer(
                 AlignRK = std::max(AlignRK, RK);
               if (RK.AttrKind == Attribute::Dereferenceable)
                 DerefRK = std::max(DerefRK, RK);
-              if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
+              IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value();
+              if (IsAligned && DerefRK &&
                   DerefRK.ArgValue >= Size.getZExtValue())
                 return true; // We have found what we needed so we stop looking
               return false;  // Other assumes may have better information. so
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 1333451a0077e..815e6bce52c0a 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -104,8 +104,8 @@ exit:
   ret void
 }
 
-define void @deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
-; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(
+define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; CHECK-LABEL: define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(
 ; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4) ]
@@ -899,32 +899,15 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noali
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK:       [[PRED_LOAD_IF]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 1
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
-; CHECK:       [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT:    [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK:       [[PRED_LOAD_IF1]]:
-; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK:       [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT:    [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
@@ -1168,32 +1151,15 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK:       [[PRED_LOAD_IF]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
-; CHECK:       [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT:    [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK:       [[PRED_LOAD_IF1]]:
-; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK:       [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT:    [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0

From 6beaa4552686685e93e31dcb6275e18c4c17acd9 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 13:25:10 -0500
Subject: [PATCH 23/79] [RISCV][VLOPT] Fix VCFCT incompatible EEW test
 (#122327)

https://github.com/llvm/llvm-project/pull/122151 added this test with an
invalid SEW. Use a valid SEW here.
---
 llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index 2359fae9389d0..389a7ff1fdbc3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -1423,9 +1423,9 @@ name: vfred_vs1_vs2_incompatible_eew
 body: |
   bb.0:
     ; CHECK-LABEL: name: vfred_vs1_vs2_incompatible_eew
-    ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK: %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 6 /* e64 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0 /* tu, mu */
-    %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 3 /* e8 */, 0
+    %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 6 /* e64 */, 0
     %y:vr = PseudoVFREDMAX_VS_M1_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0
 ...
 ---

From cbcb7ad32e6faca1f4c0f2f436e6076774104e17 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <razvan.lupusoru@gmail.com>
Date: Thu, 9 Jan 2025 10:27:37 -0800
Subject: [PATCH 24/79] [mlir][acc] Introduce MappableType interface (#122146)

OpenACC data clause operations previously required that the variable
operand implemented PointerLikeType interface. This was a reasonable
constraint because the dialects currently mixed with `acc` do use
pointers to represent variables. However, this forces the "pointer"
abstraction to be exposed too early and some cases are not cleanly
representable through this approach (more specifically FIR's `fix.box`
abstraction).

Thus, relax this by allowing a variable to be a type which implements
either `PointerLikeType` interface or `MappableType` interface.
---
 mlir/docs/Dialects/OpenACCDialect.md          |  98 +++++-
 mlir/include/mlir/Dialect/OpenACC/OpenACC.h   |  44 ++-
 .../mlir/Dialect/OpenACC/OpenACCOps.td        | 284 +++++++++++-------
 .../Dialect/OpenACC/OpenACCTypeInterfaces.td  |  93 ++++++
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       | 220 +++++++++++++-
 .../Dialect/OpenACC/OpenACCOpsTest.cpp        | 128 ++++++--
 6 files changed, 696 insertions(+), 171 deletions(-)

diff --git a/mlir/docs/Dialects/OpenACCDialect.md b/mlir/docs/Dialects/OpenACCDialect.md
index 2f1bb194a167d..9e85d66ba340f 100755
--- a/mlir/docs/Dialects/OpenACCDialect.md
+++ b/mlir/docs/Dialects/OpenACCDialect.md
@@ -274,28 +274,96 @@ reference counters are zero, a delete action is performed.
 
 ### Types
 
-There are a few acc dialect type categories to describe:
-* type of acc data clause operation input `varPtr`
-	- The type of `varPtr` must be pointer-like. This is done by
-	attaching the `PointerLikeType` interface to the appropriate MLIR
-	type. Although memory/storage concept is a lower level abstraction,
-	it is useful because the OpenACC model distinguishes between host
-	and device memory explicitly - and the mapping between the two is
-	done through pointers. Thus, by explicitly requiring it in the
-	dialect, the appropriate language frontend must create storage or
-	use type that satisfies the mapping constraint.
+Since the `acc dialect` is meant to be used alongside other dialects which
+represent the source language, appropriate use of types and type interfaces is
+key to ensuring compatibility. This section describes those considerations.
+
+#### Data Clause Operation Types
+
+Data clause operations (eg. `acc.copyin`) rely on the following type
+considerations:
+* type of acc data clause operation input `var`
+	- The type of `var` must be one with `PointerLikeType` or `MappableType`
+	interfaces attached. The first, `PointerLikeType`, is useful because
+	the OpenACC memory model distinguishes between host and device memory
+	explicitly - and the mapping between the two is	done through pointers. Thus,
+	by explicitly requiring it in the dialect, the appropriate language
+	frontend must create storage or	use type that satisfies the mapping
+	constraint. The second possibility, `MappableType` was added because
+	memory/storage concept is a lower level abstraction and not all dialects
+	choose to use a pointer abstraction especially in the case where semantics
+	are more complex (such as `fir.box` which represents Fortran descriptors
+	and is defined in the `fir` dialect used from `flang`).
 * type of result of acc data clause operations
 	- The type of the acc data clause operation is exactly the same as
-	`varPtr`. This was done intentionally instead of introducing an
-	`acc.ref/ptr` type so that IR compatibility and the dialect's
+	`var`. This was done intentionally instead of introducing specific `acc`
+	output types so that so that IR compatibility and the dialect's
 	existing strong type checking can be maintained. This is needed
 	since the `acc` dialect must live within another dialect whose type
-	system is unknown to it. The only constraint is that the appropriate
-	dialect type must use the `PointerLikeType` interface.
+	system is unknown to it.
+* variable type captured in `varType`
+	- When `var`'s type is `PointerLikeType`, the actual type of the target
+	may be lost. More specifically, dialects like `llvm` which use opaque
+	pointers, do not record the target variable's type. The use of this field
+	bridges this gap.
 * type of decomposed clauses
 	- Decomposed clauses, such as `acc.bounds` and `acc.declare_enter`
 	produce types to allow their results to be used only in specific
-	operations.
+	operations. These are synthetic types solely used for proper IR
+	construction.
+
+#### Pointer-Like Requirement
+
+The need to have pointer-type requirement in the acc dialect stems from
+a few different aspects:
+- Existing dialects like `hlfir`, `fir`, `cir`, `llvm` use a pointer
+representation for variables.
+- Reference counters (for data clauses) are described in terms of
+memory. In OpenACC spec 3.3 in section 2.6.7. It says: "A structured reference
+counter is incremented when entering each data or compute region that contain an
+explicit data clause or implicitly-determined data attributes for that section
+of memory". This implies addressability of memory.
+- Attach semantics (2.6.8 attachment counter) are specified using
+"address" terminology: "The attachment counter for a pointer is set to
+one whenever the pointer is attached to new target address, and
+incremented whenever an attach action for that pointer is performed for
+the same target address.
+
+#### Type Interfaces
+
+The `acc` dialect describes two different type interfaces which must be
+implemented and attached to the source dialect's types in order to allow use
+of data clause operations (eg. `acc.copyin`). They are as follows:
+* `PointerLikeType`
+  - The idea behind this interface is that variables end up being represented
+  as pointers in many dialects. More specifically, `fir`, `cir`, `llvm`
+  represent user declared local variables with some dialect specific form of
+  `alloca` operation which produce pointers. Globals, similarly, are referred by
+  their address through some form of `address_of` operation. Additionally, an
+  implementation for OpenACC runtime needs to distinguish between device and
+  host memory - also typically done by talking about pointers. So this type
+  interface requirement fits in naturally with OpenACC specification. Data
+  mapping operation semantics can often be simply described by a pointer and
+  size of the data it points to.
+* `MappableType`
+   - This interface was introduced because the `PointerLikeType` requirement
+  cannot represent cases when the source dialect does not use pointers. Also,
+  some cases, such as Fortran descriptor-backed arrays and Fortran optional
+  arguments, require decomposition into multiple steps. For example, in the
+  descriptor case, mapping of descriptor is needed, mapping of the data, and
+  implicit attach into device descriptor. In order to allow capturing all of
+  this complexity with a single data clause operation, the `MappableType`
+  interface was introduced. This is consistent with the dialect's goals
+  including being "able to regenerate the semantic equivalent of the user
+  pragmas".
+
+The intent is that a dialect's type system implements one of these two
+interfaces. And to be precise, a type should only implement one or the other
+(and not both) - since keeping them separate avoids ambiguity on what actually
+needs mapped. When `var` is `PointerLikeType`, the assumption is that the data
+pointed-to will be mapped. If the pointer-like type also implemented
+`MappableType` interface, it becomes ambiguous whether the data pointed to or
+the pointer itself is being mapped.
 
 ### Recipes
 
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
index cda07d6a91364..748cb7f28fc8c 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
@@ -25,6 +25,7 @@
 #include "mlir/Dialect/OpenACC/OpenACCOpsInterfaces.h.inc"
 #include "mlir/Dialect/OpenACC/OpenACCTypeInterfaces.h.inc"
 #include "mlir/Dialect/OpenACCMPCommon/Interfaces/AtomicInterfaces.h"
+#include "mlir/IR/Value.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
@@ -83,16 +84,31 @@ namespace acc {
 /// combined and the final mapping value would be 5 (4 | 1).
 enum OpenACCExecMapping { NONE = 0, VECTOR = 1, WORKER = 2, GANG = 4 };
 
-/// Used to obtain the `varPtr` from a data clause operation.
+/// Used to obtain the `var` from a data clause operation.
 /// Returns empty value if not a data clause operation or is a data exit
-/// operation with no `varPtr`.
-mlir::Value getVarPtr(mlir::Operation *accDataClauseOp);
-
-/// Used to obtain the `accPtr` from a data clause operation.
-/// When a data entry operation, it obtains its result `accPtr` value.
-/// If a data exit operation, it obtains its operand `accPtr` value.
+/// operation with no `var`.
+mlir::Value getVar(mlir::Operation *accDataClauseOp);
+
+/// Used to obtain the `var` from a data clause operation if it implements
+/// `PointerLikeType`.
+mlir::TypedValue<mlir::acc::PointerLikeType>
+getVarPtr(mlir::Operation *accDataClauseOp);
+
+/// Used to obtains the `varType` from a data clause operation which records
+/// the type of variable. When `var` is `PointerLikeType`, this returns
+/// the type of the pointer target.
+mlir::Type getVarType(mlir::Operation *accDataClauseOp);
+
+/// Used to obtain the `accVar` from a data clause operation.
+/// When a data entry operation, it obtains its result `accVar` value.
+/// If a data exit operation, it obtains its operand `accVar` value.
 /// Returns empty value if not a data clause operation.
-mlir::Value getAccPtr(mlir::Operation *accDataClauseOp);
+mlir::Value getAccVar(mlir::Operation *accDataClauseOp);
+
+/// Used to obtain the `accVar` from a data clause operation if it implements
+/// `PointerLikeType`.
+mlir::TypedValue<mlir::acc::PointerLikeType>
+getAccPtr(mlir::Operation *accDataClauseOp);
 
 /// Used to obtain the `varPtrPtr` from a data clause operation.
 /// Returns empty value if not a data clause operation.
@@ -136,6 +152,18 @@ mlir::ValueRange getDataOperands(mlir::Operation *accOp);
 /// Used to get a mutable range iterating over the data operands.
 mlir::MutableOperandRange getMutableDataOperands(mlir::Operation *accOp);
 
+/// Used to check whether the provided `type` implements the `PointerLikeType`
+/// interface.
+inline bool isPointerLikeType(mlir::Type type) {
+  return mlir::isa<mlir::acc::PointerLikeType>(type);
+}
+
+/// Used to check whether the provided `type` implements the `MappableType`
+/// interface.
+inline bool isMappableType(mlir::Type type) {
+  return mlir::isa<mlir::acc::MappableType>(type);
+}
+
 /// Used to obtain the attribute name for declare.
 static constexpr StringLiteral getDeclareAttrName() {
   return StringLiteral("acc.declare");
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 3ac265ac68756..a47f70b168066 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -70,7 +70,14 @@ def IntOrIndex : AnyTypeOf<[AnyInteger, Index]>;
 
 // Simple alias to pointer-like interface to reduce verbosity.
 def OpenACC_PointerLikeType : TypeAlias<OpenACC_PointerLikeTypeInterface,
-	"pointer-like type">;
+    "pointer-like type">;
+def OpenACC_MappableType : TypeAlias<OpenACC_MappableTypeInterface,
+    "mappable type">;
+
+def OpenACC_AnyPointerOrMappableLike : TypeConstraint<Or<[OpenACC_PointerLikeType.predicate,
+    OpenACC_MappableType.predicate]>, "any pointer or mappable">;
+def OpenACC_AnyPointerOrMappableType : Type<OpenACC_AnyPointerOrMappableLike.predicate,
+    "any pointer or mappable">;
 
 // Define the OpenACC data clauses. There are a few cases where a modifier
 // is used, like create(zero), copyin(readonly), and copyout(zero). Since in
@@ -353,7 +360,8 @@ def OpenACC_DataBoundsOp : OpenACC_Op<"bounds",
         build($_builder, $_state,
           ::mlir::acc::DataBoundsType::get($_builder.getContext()),
           /*lowerbound=*/{}, /*upperbound=*/{}, extent,
-          /*stride=*/{}, /*strideInBytes=*/nullptr, /*startIdx=*/{});
+          /*stride=*/{}, /*strideInBytes=*/$_builder.getBoolAttr(false),
+          /*startIdx=*/{});
       }]
     >,
     OpBuilder<(ins "::mlir::Value":$lowerbound,
@@ -361,7 +369,8 @@ def OpenACC_DataBoundsOp : OpenACC_Op<"bounds",
         build($_builder, $_state,
           ::mlir::acc::DataBoundsType::get($_builder.getContext()),
           lowerbound, upperbound, /*extent=*/{},
-          /*stride=*/{}, /*strideInBytes=*/nullptr, /*startIdx=*/{});
+          /*stride=*/{}, /*strideInBytes=*/$_builder.getBoolAttr(false),
+          /*startIdx=*/{});
       }]
     >
   ];
@@ -396,10 +405,15 @@ class OpenACC_DataEntryOp<string mnemonic, string clause, string extraDescriptio
 
   let description = !strconcat(extraDescription, [{
     Description of arguments:
-    - `varPtr`: The address of variable to copy.
-    - `varPtrPtr`: Specifies the address of varPtr - only used when the variable
-    copied is a field in a struct. This is important for OpenACC due to implicit
-    attach semantics on data clauses (2.6.4).
+    - `var`: The variable to copy. Must be either `MappableType` or
+    `PointerLikeType`.
+    - `varType`: The type of the variable that is being copied. When `var` is
+    a `MappableType`, this matches the type of `var`. When `var` is a
+    `PointerLikeType`, this type holds information about the target of the
+    pointer.
+    - `varPtrPtr`: Specifies the address of the address of `var` - only used
+    when the variable copied is a field in a struct. This is important for
+    OpenACC due to implicit attach semantics on data clauses (2.6.4).
     - `bounds`: Used when copying just slice of array or array's bounds are not
     encoded in type. They are in rank order where rank 0 is inner-most dimension.
     - `asyncOperands` and `asyncOperandsDeviceType`:
@@ -456,42 +470,74 @@ class OpenACC_DataEntryOp<string mnemonic, string clause, string extraDescriptio
       }
       return nullptr;
     }
+    mlir::TypedValue<mlir::acc::PointerLikeType> getVarPtr() {
+      return mlir::dyn_cast<mlir::TypedValue<mlir::acc::PointerLikeType>>(getVar());
+    }
+    mlir::TypedValue<mlir::acc::PointerLikeType> getAccPtr() {
+      return mlir::dyn_cast<mlir::TypedValue<mlir::acc::PointerLikeType>>(getAccVar());
+    }
   }];
 
   let assemblyFormat = [{
-    `varPtr` `(` $varPtr `:` custom<VarPtrType>(type($varPtr), $varType)
+    custom<Var>($var) `:` custom<VarPtrType>(type($var), $varType)
     oilist(
         `varPtrPtr` `(` $varPtrPtr `:` type($varPtrPtr) `)`
       | `bounds` `(` $bounds `)`
       | `async` `(` custom<DeviceTypeOperands>($asyncOperands,
             type($asyncOperands), $asyncOperandsDeviceType) `)`
-    ) `->` type($accPtr) attr-dict
+    ) `->` type($accVar) attr-dict
   }];
 
   let hasVerifier = 1;
 
-  let builders = [OpBuilder<(ins "::mlir::Value":$varPtr, "bool":$structured,
-                                "bool":$implicit,
-                                CArg<"::mlir::ValueRange", "{}">:$bounds),
-                            [{
+  let builders = [
+    OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr,
+                   "bool":$structured, "bool":$implicit,
+                   CArg<"::mlir::ValueRange", "{}">:$bounds),
+      [{
         build($_builder, $_state, varPtr.getType(), varPtr,
           /*varType=*/::mlir::TypeAttr::get(
-            ::mlir::cast<::mlir::acc::PointerLikeType>(
-              varPtr.getType()).getElementType()),
+            varPtr.getType().getElementType()),
           /*varPtrPtr=*/{}, bounds, /*asyncOperands=*/{},
           /*asyncOperandsDeviceType=*/nullptr,
           /*asyncOnly=*/nullptr, /*dataClause=*/nullptr,
           /*structured=*/$_builder.getBoolAttr(structured),
           /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/nullptr);
       }]>,
-                  OpBuilder<(ins "::mlir::Value":$varPtr, "bool":$structured,
-                                "bool":$implicit, "const ::llvm::Twine &":$name,
-                                CArg<"::mlir::ValueRange", "{}">:$bounds),
-                            [{
+    OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr,
+                   "bool":$structured, "bool":$implicit,
+                   "const ::llvm::Twine &":$name,
+                   CArg<"::mlir::ValueRange", "{}">:$bounds),
+      [{
         build($_builder, $_state, varPtr.getType(), varPtr,
           /*varType=*/::mlir::TypeAttr::get(
-            ::mlir::cast<::mlir::acc::PointerLikeType>(
-              varPtr.getType()).getElementType()),
+            varPtr.getType().getElementType()),
+          /*varPtrPtr=*/{}, bounds, /*asyncOperands=*/{},
+          /*asyncOperandsDeviceType=*/nullptr,
+          /*asyncOnly=*/nullptr, /*dataClause=*/nullptr,
+          /*structured=*/$_builder.getBoolAttr(structured),
+          /*implicit=*/$_builder.getBoolAttr(implicit),
+          /*name=*/$_builder.getStringAttr(name));
+      }]>,
+    OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::MappableType>":$var,
+                   "bool":$structured, "bool":$implicit,
+                   CArg<"::mlir::ValueRange", "{}">:$bounds),
+      [{
+        build($_builder, $_state, var.getType(), var,
+          /*varType=*/::mlir::TypeAttr::get(var.getType()),
+          /*varPtrPtr=*/{}, bounds, /*asyncOperands=*/{},
+          /*asyncOperandsDeviceType=*/nullptr,
+          /*asyncOnly=*/nullptr, /*dataClause=*/nullptr,
+          /*structured=*/$_builder.getBoolAttr(structured),
+          /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/nullptr);
+      }]>,
+    OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::MappableType>":$var,
+                   "bool":$structured, "bool":$implicit,
+                   "const ::llvm::Twine &":$name,
+                  CArg<"::mlir::ValueRange", "{}">:$bounds),
+      [{
+        build($_builder, $_state, var.getType(), var,
+          /*varType=*/::mlir::TypeAttr::get(var.getType()),
           /*varPtrPtr=*/{}, bounds, /*asyncOperands=*/{},
           /*asyncOperandsDeviceType=*/nullptr,
           /*asyncOnly=*/nullptr, /*dataClause=*/nullptr,
@@ -506,10 +552,10 @@ class OpenACC_DataEntryOp<string mnemonic, string clause, string extraDescriptio
 //===----------------------------------------------------------------------===//
 def OpenACC_PrivateOp : OpenACC_DataEntryOp<"private",
     "mlir::acc::DataClause::acc_private", "", [],
-    (ins OpenACC_PointerLikeTypeInterface:$varPtr)> {
+    (ins OpenACC_AnyPointerOrMappableType:$var)> {
   let summary = "Represents private semantics for acc private clause.";
-  let results = (outs Arg<OpenACC_PointerLikeTypeInterface,
-                          "Address of device variable",[MemWrite]>:$accPtr);
+  let results = (outs Arg<OpenACC_AnyPointerOrMappableType,
+                          "Accelerator mapped variable",[MemWrite]>:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -518,11 +564,11 @@ def OpenACC_PrivateOp : OpenACC_DataEntryOp<"private",
 //===----------------------------------------------------------------------===//
 def OpenACC_FirstprivateOp : OpenACC_DataEntryOp<"firstprivate",
     "mlir::acc::DataClause::acc_firstprivate", "", [],
-    (ins Arg<OpenACC_PointerLikeTypeInterface,"Address of variable",[MemRead]>:$varPtr)> {
+    (ins Arg<OpenACC_AnyPointerOrMappableType,"Host variable",[MemRead]>:$var)> {
   let summary = "Represents firstprivate semantic for the acc firstprivate "
                 "clause.";
-  let results = (outs Arg<OpenACC_PointerLikeTypeInterface,
-                          "Address of device variable",[MemWrite]>:$accPtr);
+  let results = (outs Arg<OpenACC_AnyPointerOrMappableType,
+                          "Accelerator mapped variable",[MemWrite]>:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -531,10 +577,10 @@ def OpenACC_FirstprivateOp : OpenACC_DataEntryOp<"firstprivate",
 //===----------------------------------------------------------------------===//
 def OpenACC_ReductionOp : OpenACC_DataEntryOp<"reduction",
     "mlir::acc::DataClause::acc_reduction", "", [],
-    (ins Arg<OpenACC_PointerLikeTypeInterface,"Address of variable",[MemRead]>:$varPtr)> {
+    (ins Arg<OpenACC_AnyPointerOrMappableType,"Host variable",[MemRead]>:$var)> {
   let summary = "Represents reduction semantics for acc reduction clause.";
-  let results = (outs Arg<OpenACC_PointerLikeTypeInterface,
-                          "Address of device variable",[MemWrite]>:$accPtr);
+  let results = (outs Arg<OpenACC_AnyPointerOrMappableType,
+                          "Accelerator mapped variable",[MemWrite]>:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -544,9 +590,9 @@ def OpenACC_ReductionOp : OpenACC_DataEntryOp<"reduction",
 def OpenACC_DevicePtrOp : OpenACC_DataEntryOp<"deviceptr",
     "mlir::acc::DataClause::acc_deviceptr", "",
     [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>]>],
-    (ins OpenACC_PointerLikeTypeInterface:$varPtr)> {
+    (ins OpenACC_AnyPointerOrMappableType:$var)> {
   let summary = "Specifies that the variable pointer is a device pointer.";
-  let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr);
+  let results = (outs OpenACC_AnyPointerOrMappableType:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -557,9 +603,9 @@ def OpenACC_PresentOp : OpenACC_DataEntryOp<"present",
     "mlir::acc::DataClause::acc_present", "",
     [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>,
                     MemWrite<OpenACC_RuntimeCounters>]>],
-    (ins OpenACC_PointerLikeTypeInterface:$varPtr)> {
+    (ins OpenACC_AnyPointerOrMappableType:$var)> {
   let summary = "Specifies that the variable is already present on device.";
-  let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr);
+  let results = (outs OpenACC_AnyPointerOrMappableType:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -570,11 +616,11 @@ def OpenACC_CopyinOp : OpenACC_DataEntryOp<"copyin",
     "mlir::acc::DataClause::acc_copyin", "",
     [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>,
                     MemWrite<OpenACC_RuntimeCounters>]>],
-    (ins Arg<OpenACC_PointerLikeTypeInterface,"Address of variable",[MemRead]>:$varPtr)> {
+    (ins Arg<OpenACC_AnyPointerOrMappableType,"Host variable",[MemRead]>:$var)> {
   let summary = "Represents copyin semantics for acc data clauses like acc "
                 "copyin and acc copy.";
-  let results = (outs Arg<OpenACC_PointerLikeTypeInterface,
-                          "Address of device variable",[MemWrite]>:$accPtr);
+  let results = (outs Arg<OpenACC_AnyPointerOrMappableType,
+                          "Accelerator mapped variable",[MemWrite]>:$accVar);
 
   let extraClassDeclaration = extraClassDeclarationBase # [{
     /// Check if this is a copyin with readonly modifier.
@@ -589,11 +635,11 @@ def OpenACC_CreateOp : OpenACC_DataEntryOp<"create",
     "mlir::acc::DataClause::acc_create", "",
     [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>,
                     MemWrite<OpenACC_RuntimeCounters>]>],
-    (ins OpenACC_PointerLikeTypeInterface:$varPtr)> {
+    (ins OpenACC_AnyPointerOrMappableType:$var)> {
   let summary = "Represents create semantics for acc data clauses like acc "
                 "create and acc copyout.";
-  let results = (outs Arg<OpenACC_PointerLikeTypeInterface,
-                          "Address of device variable",[MemWrite]>:$accPtr);
+  let results = (outs Arg<OpenACC_AnyPointerOrMappableType,
+                          "Accelerator mapped variable",[MemWrite]>:$accVar);
 
   let extraClassDeclaration = extraClassDeclarationBase # [{
     /// Check if this is a create with zero modifier.
@@ -608,9 +654,9 @@ def OpenACC_NoCreateOp : OpenACC_DataEntryOp<"nocreate",
     "mlir::acc::DataClause::acc_no_create", "",
     [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>,
                     MemWrite<OpenACC_RuntimeCounters>]>],
-    (ins OpenACC_PointerLikeTypeInterface:$varPtr)> {
+    (ins OpenACC_AnyPointerOrMappableType:$var)> {
   let summary = "Represents acc no_create semantics.";
-  let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr);
+  let results = (outs OpenACC_AnyPointerOrMappableType:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -621,11 +667,11 @@ def OpenACC_AttachOp : OpenACC_DataEntryOp<"attach",
     "mlir::acc::DataClause::acc_attach", "",
     [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>,
                     MemWrite<OpenACC_RuntimeCounters>]>],
-    (ins Arg<OpenACC_PointerLikeTypeInterface,"Address of variable",[MemRead]>:$varPtr)> {
+    (ins Arg<OpenACC_AnyPointerOrMappableType,"Host variable",[MemRead]>:$var)> {
   let summary = "Represents acc attach semantics which updates a pointer in "
                 "device memory with the corresponding device address of the "
                 "pointee.";
-  let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr);
+  let results = (outs OpenACC_AnyPointerOrMappableType:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -645,9 +691,9 @@ def OpenACC_GetDevicePtrOp : OpenACC_DataEntryOp<"getdeviceptr",
       that is any of the valid `mlir::acc::DataClause` entries.
       \
     }], [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>]>],
-    (ins OpenACC_PointerLikeTypeInterface:$varPtr)> {
+    (ins OpenACC_AnyPointerOrMappableType:$var)> {
   let summary = "Gets device address if variable exists on device.";
-  let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr);
+  let results = (outs OpenACC_AnyPointerOrMappableType:$accVar);
   let hasVerifier = 0;
   let extraClassDeclaration = extraClassDeclarationBase;
 }
@@ -657,10 +703,10 @@ def OpenACC_GetDevicePtrOp : OpenACC_DataEntryOp<"getdeviceptr",
 //===----------------------------------------------------------------------===//
 def OpenACC_UpdateDeviceOp : OpenACC_DataEntryOp<"update_device",
     "mlir::acc::DataClause::acc_update_device", "", [],
-    (ins Arg<OpenACC_PointerLikeTypeInterface,"Address of variable",[MemRead]>:$varPtr)> {
+    (ins Arg<OpenACC_AnyPointerOrMappableType,"Host variable",[MemRead]>:$var)> {
   let summary = "Represents acc update device semantics.";
-  let results = (outs Arg<OpenACC_PointerLikeTypeInterface,
-                          "Address of device variable",[MemWrite]>:$accPtr);
+  let results = (outs Arg<OpenACC_AnyPointerOrMappableType,
+                          "Accelerator mapped variable",[MemWrite]>:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -670,9 +716,9 @@ def OpenACC_UpdateDeviceOp : OpenACC_DataEntryOp<"update_device",
 def OpenACC_UseDeviceOp : OpenACC_DataEntryOp<"use_device",
     "mlir::acc::DataClause::acc_use_device", "",
     [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>]>],
-    (ins OpenACC_PointerLikeTypeInterface:$varPtr)> {
+    (ins OpenACC_AnyPointerOrMappableType:$var)> {
   let summary = "Represents acc use_device semantics.";
-  let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr);
+  let results = (outs OpenACC_AnyPointerOrMappableType:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -682,9 +728,9 @@ def OpenACC_UseDeviceOp : OpenACC_DataEntryOp<"use_device",
 def OpenACC_DeclareDeviceResidentOp : OpenACC_DataEntryOp<"declare_device_resident",
     "mlir::acc::DataClause::acc_declare_device_resident", "",
     [MemoryEffects<[MemWrite<OpenACC_RuntimeCounters>]>],
-    (ins Arg<OpenACC_PointerLikeTypeInterface,"Address of variable",[MemRead]>:$varPtr)> {
+    (ins Arg<OpenACC_AnyPointerOrMappableType,"Host variable",[MemRead]>:$var)> {
   let summary = "Represents acc declare device_resident semantics.";
-  let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr);
+  let results = (outs OpenACC_AnyPointerOrMappableType:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -694,9 +740,9 @@ def OpenACC_DeclareDeviceResidentOp : OpenACC_DataEntryOp<"declare_device_reside
 def OpenACC_DeclareLinkOp : OpenACC_DataEntryOp<"declare_link",
     "mlir::acc::DataClause::acc_declare_link", "",
     [MemoryEffects<[MemWrite<OpenACC_RuntimeCounters>]>],
-    (ins Arg<OpenACC_PointerLikeTypeInterface,"Address of variable",[MemRead]>:$varPtr)> {
+    (ins Arg<OpenACC_AnyPointerOrMappableType,"Host variable",[MemRead]>:$var)> {
   let summary = "Represents acc declare link semantics.";
-  let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr);
+  let results = (outs OpenACC_AnyPointerOrMappableType:$accVar);
   let extraClassDeclaration = extraClassDeclarationBase;
 }
 
@@ -705,10 +751,10 @@ def OpenACC_DeclareLinkOp : OpenACC_DataEntryOp<"declare_link",
 //===----------------------------------------------------------------------===//
 def OpenACC_CacheOp : OpenACC_DataEntryOp<"cache",
     "mlir::acc::DataClause::acc_cache", "", [NoMemoryEffect],
-    (ins OpenACC_PointerLikeTypeInterface:$varPtr)> {
+    (ins OpenACC_AnyPointerOrMappableType:$var)> {
   let summary = "Represents the cache directive that is associated with a "
                 "loop.";
-  let results = (outs OpenACC_PointerLikeTypeInterface:$accPtr);
+  let results = (outs OpenACC_AnyPointerOrMappableType:$accVar);
 
   let extraClassDeclaration = extraClassDeclarationBase # [{
     /// Check if this is a cache with readonly modifier.
@@ -738,7 +784,7 @@ class OpenACC_DataExitOp<string mnemonic, string clause, string extraDescription
                        OptionalAttr<StrAttr>:$name));
 
   let description = !strconcat(extraDescription, [{
-    - `accPtr`: The acc address of variable. This is the link from the data-entry
+    - `accVar`: The acc variable. This is the link from the data-entry
     operation used.
     - `bounds`: Used when copying just slice of array or array's bounds are not
     encoded in type. They are in rank order where rank 0 is inner-most dimension.
@@ -808,57 +854,67 @@ class OpenACC_DataExitOpWithVarPtr<string mnemonic, string clause>
           [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>,
                           MemWrite<OpenACC_RuntimeCounters>]>],
           (ins Arg<OpenACC_PointerLikeTypeInterface,
-                   "Address of device variable", [MemRead]>:$accPtr,
+                   "Accelerator mapped variable", [MemRead]>:$accVar,
               Arg<OpenACC_PointerLikeTypeInterface,
-                  "Address of variable", [MemWrite]>:$varPtr,
+                  "Host variable", [MemWrite]>:$var,
               TypeAttr:$varType)> {
   let assemblyFormat = [{
-    `accPtr` `(` $accPtr `:` type($accPtr) `)`
+    custom<AccVar>($accVar, type($accVar))
     (`bounds` `(` $bounds^ `)` )?
     (`async` `(` custom<DeviceTypeOperands>($asyncOperands,
             type($asyncOperands), $asyncOperandsDeviceType)^ `)`)?
-    `to` `varPtr` `(` $varPtr `:` custom<VarPtrType>(type($varPtr), $varType)
+    `to` custom<Var>($var) `:` custom<VarPtrType>(type($var), $varType)
     attr-dict
   }];
 
-  let builders = [OpBuilder<(ins "::mlir::Value":$accPtr,
-                                "::mlir::Value":$varPtr, "bool":$structured,
-                                "bool":$implicit,
-                                CArg<"::mlir::ValueRange", "{}">:$bounds),
-                            [{
+  let builders = [
+    OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$accPtr,
+                   "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr,
+                   "bool":$structured, "bool":$implicit,
+                   CArg<"::mlir::ValueRange", "{}">:$bounds),
+      [{
         build($_builder, $_state, accPtr, varPtr,
           /*varType=*/::mlir::TypeAttr::get(
-          ::mlir::cast<::mlir::acc::PointerLikeType>(
-              varPtr.getType()).getElementType()),
+            varPtr.getType().getElementType()),
           bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr,
           /*asyncOnly=*/nullptr, /*dataClause=*/nullptr,
           /*structured=*/$_builder.getBoolAttr(structured),
           /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/nullptr);
       }]>,
-                  OpBuilder<(ins "::mlir::Value":$accPtr,
-                                "::mlir::Value":$varPtr, "bool":$structured,
-                                "bool":$implicit, "const ::llvm::Twine &":$name,
-                                CArg<"::mlir::ValueRange", "{}">:$bounds),
-                            [{
+    OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$accPtr,
+                   "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr,
+                   "bool":$structured, "bool":$implicit,
+                   "const ::llvm::Twine &":$name,
+                   CArg<"::mlir::ValueRange", "{}">:$bounds),
+      [{
         build($_builder, $_state, accPtr, varPtr,
           /*varType=*/::mlir::TypeAttr::get(
-          ::mlir::cast<::mlir::acc::PointerLikeType>(
-              varPtr.getType()).getElementType()),
+            varPtr.getType().getElementType()),
           bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr,
           /*asyncOnly=*/nullptr, /*dataClause=*/nullptr,
           /*structured=*/$_builder.getBoolAttr(structured),
           /*implicit=*/$_builder.getBoolAttr(implicit),
           /*name=*/$_builder.getStringAttr(name));
       }]>];
+
+  code extraClassDeclarationDataExit = [{
+    mlir::TypedValue<mlir::acc::PointerLikeType> getVarPtr() {
+      return mlir::dyn_cast<mlir::TypedValue<mlir::acc::PointerLikeType>>(getVar());
+    }
+    mlir::TypedValue<mlir::acc::PointerLikeType> getAccPtr() {
+      return mlir::dyn_cast<mlir::TypedValue<mlir::acc::PointerLikeType>>(getAccVar());
+    }
+  }];
 }
 
 class OpenACC_DataExitOpNoVarPtr<string mnemonic, string clause> :
     OpenACC_DataExitOp<mnemonic, clause, "",
       [MemoryEffects<[MemRead<OpenACC_RuntimeCounters>,
                     MemWrite<OpenACC_RuntimeCounters>]>],
-      (ins Arg<OpenACC_PointerLikeTypeInterface,"Address of device variable",[MemRead]>:$accPtr)> {
+      (ins Arg<OpenACC_PointerLikeTypeInterface,"Accelerator mapped variable",
+           [MemRead]>:$accVar)> {
   let assemblyFormat = [{
-    `accPtr` `(` $accPtr `:` type($accPtr) `)`
+    custom<AccVar>($accVar, type($accVar))
     (`bounds` `(` $bounds^ `)` )?
     (`async` `(` custom<DeviceTypeOperands>($asyncOperands,
             type($asyncOperands), $asyncOperandsDeviceType)^ `)`)?
@@ -866,31 +922,35 @@ class OpenACC_DataExitOpNoVarPtr<string mnemonic, string clause> :
   }];
 
   let builders = [
-    OpBuilder<(ins "::mlir::Value":$accPtr,
-      "bool":$structured,
-      "bool":$implicit,
-      CArg<"::mlir::ValueRange", "{}">:$bounds), [{
+    OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$accPtr,
+                   "bool":$structured, "bool":$implicit,
+                   CArg<"::mlir::ValueRange", "{}">:$bounds),
+      [{
         build($_builder, $_state, accPtr,
           bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr,
           /*asyncOnly=*/nullptr, /*dataClause=*/nullptr,
           /*structured=*/$_builder.getBoolAttr(structured),
           /*implicit=*/$_builder.getBoolAttr(implicit), /*name=*/nullptr);
-      }]
-    >,
-    OpBuilder<(ins "::mlir::Value":$accPtr,
-      "bool":$structured,
-      "bool":$implicit,
-      "const ::llvm::Twine &":$name,
-      CArg<"::mlir::ValueRange", "{}">:$bounds), [{
+      }]>,
+    OpBuilder<(ins "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$accPtr,
+                   "bool":$structured, "bool":$implicit,
+                   "const ::llvm::Twine &":$name,
+                   CArg<"::mlir::ValueRange", "{}">:$bounds),
+      [{
         build($_builder, $_state, accPtr,
           bounds, /*asyncOperands=*/{}, /*asyncOperandsDeviceType=*/nullptr,
           /*asyncOnly=*/nullptr, /*dataClause=*/nullptr,
           /*structured=*/$_builder.getBoolAttr(structured),
           /*implicit=*/$_builder.getBoolAttr(implicit),
           /*name=*/$_builder.getStringAttr(name));
-      }]
-    >
+      }]>
   ];
+
+  code extraClassDeclarationDataExit = [{
+    mlir::TypedValue<mlir::acc::PointerLikeType> getAccPtr() {
+      return mlir::dyn_cast<mlir::TypedValue<mlir::acc::PointerLikeType>>(getAccVar());
+    }
+  }];
 }
 
 //===----------------------------------------------------------------------===//
@@ -900,7 +960,7 @@ def OpenACC_CopyoutOp : OpenACC_DataExitOpWithVarPtr<"copyout",
     "mlir::acc::DataClause::acc_copyout"> {
   let summary = "Represents acc copyout semantics - reverse of copyin.";
 
-  let extraClassDeclaration = extraClassDeclarationBase # [{
+  let extraClassDeclaration = extraClassDeclarationBase # extraClassDeclarationDataExit # [{
     /// Check if this is a copyout with zero modifier.
     bool isCopyoutZero();
   }];
@@ -912,7 +972,7 @@ def OpenACC_CopyoutOp : OpenACC_DataExitOpWithVarPtr<"copyout",
 def OpenACC_DeleteOp : OpenACC_DataExitOpNoVarPtr<"delete",
     "mlir::acc::DataClause::acc_delete"> {
   let summary = "Represents acc delete semantics - reverse of create.";
-  let extraClassDeclaration = extraClassDeclarationBase;
+  let extraClassDeclaration = extraClassDeclarationBase # extraClassDeclarationDataExit;
 }
 
 //===----------------------------------------------------------------------===//
@@ -921,7 +981,7 @@ def OpenACC_DeleteOp : OpenACC_DataExitOpNoVarPtr<"delete",
 def OpenACC_DetachOp : OpenACC_DataExitOpNoVarPtr<"detach",
     "mlir::acc::DataClause::acc_detach"> {
   let summary = "Represents acc detach semantics - reverse of attach.";
-  let extraClassDeclaration = extraClassDeclarationBase;
+  let extraClassDeclaration = extraClassDeclarationBase # extraClassDeclarationDataExit;
 }
 
 //===----------------------------------------------------------------------===//
@@ -930,7 +990,7 @@ def OpenACC_DetachOp : OpenACC_DataExitOpNoVarPtr<"detach",
 def OpenACC_UpdateHostOp : OpenACC_DataExitOpWithVarPtr<"update_host",
     "mlir::acc::DataClause::acc_update_host"> {
   let summary = "Represents acc update host semantics.";
-  let extraClassDeclaration = extraClassDeclarationBase # [{
+  let extraClassDeclaration = extraClassDeclarationBase # extraClassDeclarationDataExit # [{
     /// Check if this is an acc update self.
     bool isSelf() {
       return getDataClause() == acc::DataClause::acc_update_self;
@@ -1193,11 +1253,11 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel",
       UnitAttr:$selfAttr,
       Variadic<AnyType>:$reductionOperands,
       OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$privateOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$privateOperands,
       OptionalAttr<SymbolRefArrayAttr>:$privatizations,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$firstprivateOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$firstprivateOperands,
       OptionalAttr<SymbolRefArrayAttr>:$firstprivatizations,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands,
       OptionalAttr<DefaultValueAttr>:$defaultAttr,
       UnitAttr:$combined);
 
@@ -1353,11 +1413,11 @@ def OpenACC_SerialOp : OpenACC_Op<"serial",
       UnitAttr:$selfAttr,
       Variadic<AnyType>:$reductionOperands,
       OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$privateOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$privateOperands,
       OptionalAttr<SymbolRefArrayAttr>:$privatizations,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$firstprivateOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$firstprivateOperands,
       OptionalAttr<SymbolRefArrayAttr>:$firstprivatizations,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands,
       OptionalAttr<DefaultValueAttr>:$defaultAttr,
       UnitAttr:$combined);
 
@@ -1482,7 +1542,7 @@ def OpenACC_KernelsOp : OpenACC_Op<"kernels",
       Optional<I1>:$ifCond,
       Optional<I1>:$selfCond,
       UnitAttr:$selfAttr,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands,
       OptionalAttr<DefaultValueAttr>:$defaultAttr,
       UnitAttr:$combined);
 
@@ -1614,7 +1674,7 @@ def OpenACC_DataOp : OpenACC_Op<"data",
       OptionalAttr<DeviceTypeArrayAttr>:$waitOperandsDeviceType,
       OptionalAttr<BoolArrayAttr>:$hasWaitDevnum,
       OptionalAttr<DeviceTypeArrayAttr>:$waitOnly,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands,
       OptionalAttr<DefaultValueAttr>:$defaultAttr);
 
   let regions = (region AnyRegion:$region);
@@ -1709,7 +1769,7 @@ def OpenACC_EnterDataOp : OpenACC_Op<"enter_data",
                        Optional<IntOrIndex>:$waitDevnum,
                        Variadic<IntOrIndex>:$waitOperands,
                        UnitAttr:$wait,
-                       Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
+                       Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands);
 
   let extraClassDeclaration = [{
     /// The number of data operands.
@@ -1760,7 +1820,7 @@ def OpenACC_ExitDataOp : OpenACC_Op<"exit_data",
                        Optional<IntOrIndex>:$waitDevnum,
                        Variadic<IntOrIndex>:$waitOperands,
                        UnitAttr:$wait,
-                       Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands,
+                       Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands,
                        UnitAttr:$finalize);
 
   let extraClassDeclaration = [{
@@ -1810,7 +1870,7 @@ def OpenACC_HostDataOp : OpenACC_Op<"host_data",
   }];
 
   let arguments = (ins Optional<I1>:$ifCond,
-                       Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands,
+                       Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands,
                        UnitAttr:$ifPresent);
 
   let regions = (region AnyRegion:$region);
@@ -1887,8 +1947,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
       Variadic<IntOrIndex>:$tileOperands,
       OptionalAttr<DenseI32ArrayAttr>:$tileOperandsSegments,
       OptionalAttr<DeviceTypeArrayAttr>:$tileOperandsDeviceType,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$cacheOperands,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$privateOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$cacheOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$privateOperands,
       OptionalAttr<SymbolRefArrayAttr>:$privatizations,
       Variadic<AnyType>:$reductionOperands,
       OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes,
@@ -2201,7 +2261,7 @@ def OpenACC_DeclareEnterOp : OpenACC_Op<"declare_enter",
     ```
   }];
 
-  let arguments = (ins Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
+  let arguments = (ins Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands);
   let results = (outs OpenACC_DeclareTokenType:$token);
 
   let assemblyFormat = [{
@@ -2236,7 +2296,7 @@ def OpenACC_DeclareExitOp : OpenACC_Op<"declare_exit",
 
   let arguments = (ins
       Optional<OpenACC_DeclareTokenType>:$token,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
+      Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands);
 
   let assemblyFormat = [{
     oilist(
@@ -2338,7 +2398,7 @@ def OpenACC_DeclareOp : OpenACC_Op<"declare",
   }];
 
   let arguments = (ins
-      Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
+      Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands);
 
   let regions = (region AnyRegion:$region);
 
@@ -2589,7 +2649,7 @@ def OpenACC_UpdateOp : OpenACC_Op<"update",
       OptionalAttr<DeviceTypeArrayAttr>:$waitOperandsDeviceType,
       OptionalAttr<BoolArrayAttr>:$hasWaitDevnum,
       OptionalAttr<DeviceTypeArrayAttr>:$waitOnly,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands,
+      Variadic<OpenACC_AnyPointerOrMappableType>:$dataClauseOperands,
       UnitAttr:$ifPresent);
 
   let extraClassDeclaration = [{
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
index 0a3edd5637704..bec46be89f058 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
@@ -31,4 +31,97 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> {
   ];
 }
 
+def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> {
+  let cppNamespace = "::mlir::acc";
+
+  let description = [{
+    An interface to capture type-based semantics for mapping in a manner that
+    makes it convertible to size-based semantics.
+  }];
+
+  let methods = [
+    InterfaceMethod<
+      /*description=*/[{
+        Returns the pointer to the `var` if recoverable (such as in cases
+        where the current operation is a load from a memory slot).
+      }],
+      /*retTy=*/"::mlir::TypedValue<::mlir::acc::PointerLikeType>",
+      /*methodName=*/"getVarPtr",
+      /*args=*/(ins "::mlir::Value":$var),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        if (auto ptr = mlir::dyn_cast<mlir::TypedValue<mlir::acc::PointerLikeType>>(
+              var))
+          return ptr;
+        return {};
+      }]
+    >,
+    InterfaceMethod<
+      /*description=*/[{
+        Returns the size in bytes when computable. If this is an array-like
+        type, avoiding passing `accBounds` ensures a computation of the size
+        of whole type.
+      }],
+      /*retTy=*/"::std::optional<::llvm::TypeSize>",
+      /*methodName=*/"getSizeInBytes",
+      /*args=*/(ins "::mlir::Value":$var,
+                    "::mlir::ValueRange":$accBounds,
+                    "const ::mlir::DataLayout &":$dataLayout),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        // Bounds operations are typically created for array types. In the
+        // generic implementation, it is not straightforward to distinguish
+        // between array types and ensure the size and offset take into account
+        // just the slice requested. Thus return not-computable for now.
+        if (!accBounds.empty())
+          return {};
+        return {dataLayout.getTypeSize($_type)};
+      }]
+    >,
+    InterfaceMethod<
+      /*description=*/[{
+        Returns the offset in bytes when computable.
+      }],
+      /*retTy=*/"::std::optional<::int64_t>",
+      /*methodName=*/"getOffsetInBytes",
+      /*args=*/(ins "::mlir::Value":$var,
+                    "::mlir::ValueRange":$accBounds,
+                    "const ::mlir::DataLayout &":$dataLayout),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        // Bounds operations are typically created for array types. In the
+        // generic implementation, it is not straightforward to distinguish
+        // between array types and ensure the size and offset take into account
+        // just the slice requested. Thus return not-computable for now.
+        if (!accBounds.empty())
+          return {};
+
+        // If the type size is computable, it means it is trivial. Assume
+        // offset of 0.
+        if (::mlir::cast<::mlir::acc::MappableType>($_type).getSizeInBytes(
+              var, accBounds, dataLayout).has_value()) {
+          return {0};
+        }
+
+        return {};
+      }]
+    >,
+      InterfaceMethod<
+      /*description=*/[{
+        Returns explicit `acc.bounds` operations that envelop the whole
+        data structure. These operations are inserted using the provided builder
+        at the location set before calling this API.
+      }],
+      /*retTy=*/"::llvm::SmallVector<::mlir::Value>",
+      /*methodName=*/"generateAccBounds",
+      /*args=*/(ins "::mlir::Value":$var,
+                    "::mlir::OpBuilder &":$builder),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        return {};
+      }]
+    >,
+  ];
+}
+
 #endif // OPENACC_TYPE_INTERFACES
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index d490376db0e7f..735dd4fd7b8bb 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -16,6 +16,7 @@
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
+#include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/TypeSwitch.h"
@@ -192,6 +193,97 @@ static LogicalResult checkWaitAndAsyncConflict(Op op) {
   return success();
 }
 
+template <typename Op>
+static LogicalResult checkVarAndVarType(Op op) {
+  if (!op.getVar())
+    return op.emitError("must have var operand");
+
+  if (mlir::isa<mlir::acc::PointerLikeType>(op.getVar().getType()) &&
+      mlir::isa<mlir::acc::MappableType>(op.getVar().getType())) {
+    // TODO: If a type implements both interfaces (mappable and pointer-like),
+    // it is unclear which semantics to apply without additional info which
+    // would need captured in the data operation. For now restrict this case
+    // unless a compelling reason to support disambiguating between the two.
+    return op.emitError("var must be mappable or pointer-like (not both)");
+  }
+
+  if (!mlir::isa<mlir::acc::PointerLikeType>(op.getVar().getType()) &&
+      !mlir::isa<mlir::acc::MappableType>(op.getVar().getType()))
+    return op.emitError("var must be mappable or pointer-like");
+
+  if (mlir::isa<mlir::acc::MappableType>(op.getVar().getType()) &&
+      op.getVarType() != op.getVar().getType())
+    return op.emitError("varType must match when var is mappable");
+
+  return success();
+}
+
+template <typename Op>
+static LogicalResult checkVarAndAccVar(Op op) {
+  if (op.getVar().getType() != op.getAccVar().getType())
+    return op.emitError("input and output types must match");
+
+  return success();
+}
+
+static ParseResult parseVar(mlir::OpAsmParser &parser,
+                            OpAsmParser::UnresolvedOperand &var) {
+  // Either `var` or `varPtr` keyword is required.
+  if (failed(parser.parseOptionalKeyword("varPtr"))) {
+    if (failed(parser.parseKeyword("var")))
+      return failure();
+  }
+  if (failed(parser.parseLParen()))
+    return failure();
+  if (failed(parser.parseOperand(var)))
+    return failure();
+
+  return success();
+}
+
+static void printVar(mlir::OpAsmPrinter &p, mlir::Operation *op,
+                     mlir::Value var) {
+  if (mlir::isa<mlir::acc::PointerLikeType>(var.getType()))
+    p << "varPtr(";
+  else
+    p << "var(";
+  p.printOperand(var);
+}
+
+static ParseResult parseAccVar(mlir::OpAsmParser &parser,
+                               OpAsmParser::UnresolvedOperand &var,
+                               mlir::Type &accVarType) {
+  // Either `accVar` or `accPtr` keyword is required.
+  if (failed(parser.parseOptionalKeyword("accPtr"))) {
+    if (failed(parser.parseKeyword("accVar")))
+      return failure();
+  }
+  if (failed(parser.parseLParen()))
+    return failure();
+  if (failed(parser.parseOperand(var)))
+    return failure();
+  if (failed(parser.parseColon()))
+    return failure();
+  if (failed(parser.parseType(accVarType)))
+    return failure();
+  if (failed(parser.parseRParen()))
+    return failure();
+
+  return success();
+}
+
+static void printAccVar(mlir::OpAsmPrinter &p, mlir::Operation *op,
+                        mlir::Value accVar, mlir::Type accVarType) {
+  if (mlir::isa<mlir::acc::PointerLikeType>(accVar.getType()))
+    p << "accPtr(";
+  else
+    p << "accVar(";
+  p.printOperand(accVar);
+  p << " : ";
+  p.printType(accVarType);
+  p << ")";
+}
+
 static ParseResult parseVarPtrType(mlir::OpAsmParser &parser,
                                    mlir::Type &varPtrType,
                                    mlir::TypeAttr &varTypeAttr) {
@@ -211,8 +303,11 @@ static ParseResult parseVarPtrType(mlir::OpAsmParser &parser,
       return failure();
   } else {
     // Set `varType` from the element type of the type of `varPtr`.
-    varTypeAttr = mlir::TypeAttr::get(
-        mlir::cast<mlir::acc::PointerLikeType>(varPtrType).getElementType());
+    if (mlir::isa<mlir::acc::PointerLikeType>(varPtrType))
+      varTypeAttr = mlir::TypeAttr::get(
+          mlir::cast<mlir::acc::PointerLikeType>(varPtrType).getElementType());
+    else
+      varTypeAttr = mlir::TypeAttr::get(varPtrType);
   }
 
   return success();
@@ -226,8 +321,11 @@ static void printVarPtrType(mlir::OpAsmPrinter &p, mlir::Operation *op,
   // Print the `varType` only if it differs from the element type of
   // `varPtr`'s type.
   mlir::Type varType = varTypeAttr.getValue();
-  if (mlir::cast<mlir::acc::PointerLikeType>(varPtrType).getElementType() !=
-      varType) {
+  mlir::Type typeToCheckAgainst =
+      mlir::isa<mlir::acc::PointerLikeType>(varPtrType)
+          ? mlir::cast<mlir::acc::PointerLikeType>(varPtrType).getElementType()
+          : varPtrType;
+  if (typeToCheckAgainst != varType) {
     p << " varType(";
     p.printType(varType);
     p << ")";
@@ -252,6 +350,8 @@ LogicalResult acc::PrivateOp::verify() {
   if (getDataClause() != acc::DataClause::acc_private)
     return emitError(
         "data clause associated with private operation must match its intent");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
   return success();
 }
 
@@ -262,6 +362,8 @@ LogicalResult acc::FirstprivateOp::verify() {
   if (getDataClause() != acc::DataClause::acc_firstprivate)
     return emitError("data clause associated with firstprivate operation must "
                      "match its intent");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
   return success();
 }
 
@@ -272,6 +374,8 @@ LogicalResult acc::ReductionOp::verify() {
   if (getDataClause() != acc::DataClause::acc_reduction)
     return emitError("data clause associated with reduction operation must "
                      "match its intent");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
   return success();
 }
 
@@ -282,6 +386,10 @@ LogicalResult acc::DevicePtrOp::verify() {
   if (getDataClause() != acc::DataClause::acc_deviceptr)
     return emitError("data clause associated with deviceptr operation must "
                      "match its intent");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -292,6 +400,10 @@ LogicalResult acc::PresentOp::verify() {
   if (getDataClause() != acc::DataClause::acc_present)
     return emitError(
         "data clause associated with present operation must match its intent");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -307,6 +419,10 @@ LogicalResult acc::CopyinOp::verify() {
     return emitError(
         "data clause associated with copyin operation must match its intent"
         " or specify original clause this operation was decomposed from");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -326,6 +442,10 @@ LogicalResult acc::CreateOp::verify() {
     return emitError(
         "data clause associated with create operation must match its intent"
         " or specify original clause this operation was decomposed from");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -342,6 +462,10 @@ LogicalResult acc::NoCreateOp::verify() {
   if (getDataClause() != acc::DataClause::acc_no_create)
     return emitError("data clause associated with no_create operation must "
                      "match its intent");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -352,6 +476,10 @@ LogicalResult acc::AttachOp::verify() {
   if (getDataClause() != acc::DataClause::acc_attach)
     return emitError(
         "data clause associated with attach operation must match its intent");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -363,6 +491,10 @@ LogicalResult acc::DeclareDeviceResidentOp::verify() {
   if (getDataClause() != acc::DataClause::acc_declare_device_resident)
     return emitError("data clause associated with device_resident operation "
                      "must match its intent");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -374,6 +506,10 @@ LogicalResult acc::DeclareLinkOp::verify() {
   if (getDataClause() != acc::DataClause::acc_declare_link)
     return emitError(
         "data clause associated with link operation must match its intent");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -389,8 +525,12 @@ LogicalResult acc::CopyoutOp::verify() {
     return emitError(
         "data clause associated with copyout operation must match its intent"
         " or specify original clause this operation was decomposed from");
-  if (!getVarPtr() || !getAccPtr())
+  if (!getVar() || !getAccVar())
     return emitError("must have both host and device pointers");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -414,7 +554,7 @@ LogicalResult acc::DeleteOp::verify() {
     return emitError(
         "data clause associated with delete operation must match its intent"
         " or specify original clause this operation was decomposed from");
-  if (!getAccPtr())
+  if (!getAccVar())
     return emitError("must have device pointer");
   return success();
 }
@@ -429,7 +569,7 @@ LogicalResult acc::DetachOp::verify() {
     return emitError(
         "data clause associated with detach operation must match its intent"
         " or specify original clause this operation was decomposed from");
-  if (!getAccPtr())
+  if (!getAccVar())
     return emitError("must have device pointer");
   return success();
 }
@@ -444,8 +584,12 @@ LogicalResult acc::UpdateHostOp::verify() {
     return emitError(
         "data clause associated with host operation must match its intent"
         " or specify original clause this operation was decomposed from");
-  if (!getVarPtr() || !getAccPtr())
+  if (!getVar() || !getAccVar())
     return emitError("must have both host and device pointers");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -458,6 +602,10 @@ LogicalResult acc::UpdateDeviceOp::verify() {
     return emitError(
         "data clause associated with device operation must match its intent"
         " or specify original clause this operation was decomposed from");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -470,6 +618,10 @@ LogicalResult acc::UseDeviceOp::verify() {
     return emitError(
         "data clause associated with use_device operation must match its intent"
         " or specify original clause this operation was decomposed from");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -483,6 +635,10 @@ LogicalResult acc::CacheOp::verify() {
     return emitError(
         "data clause associated with cache operation must match its intent"
         " or specify original clause this operation was decomposed from");
+  if (failed(checkVarAndVarType(*this)))
+    return failure();
+  if (failed(checkVarAndAccVar(*this)))
+    return failure();
   return success();
 }
 
@@ -502,7 +658,7 @@ static ParseResult parseRegions(OpAsmParser &parser, OperationState &state,
 }
 
 static bool isComputeOperation(Operation *op) {
-  return isa<acc::ParallelOp, acc::LoopOp>(op);
+  return isa<ACC_COMPUTE_CONSTRUCT_AND_LOOP_OPS>(op);
 }
 
 namespace {
@@ -2917,20 +3073,56 @@ LogicalResult acc::WaitOp::verify() {
 // acc dialect utilities
 //===----------------------------------------------------------------------===//
 
-mlir::Value mlir::acc::getVarPtr(mlir::Operation *accDataClauseOp) {
-  auto varPtr{llvm::TypeSwitch<mlir::Operation *, mlir::Value>(accDataClauseOp)
+mlir::TypedValue<mlir::acc::PointerLikeType>
+mlir::acc::getVarPtr(mlir::Operation *accDataClauseOp) {
+  auto varPtr{llvm::TypeSwitch<mlir::Operation *,
+                               mlir::TypedValue<mlir::acc::PointerLikeType>>(
+                  accDataClauseOp)
                   .Case<ACC_DATA_ENTRY_OPS>(
                       [&](auto entry) { return entry.getVarPtr(); })
                   .Case<mlir::acc::CopyoutOp, mlir::acc::UpdateHostOp>(
                       [&](auto exit) { return exit.getVarPtr(); })
-                  .Default([&](mlir::Operation *) { return mlir::Value(); })};
+                  .Default([&](mlir::Operation *) {
+                    return mlir::TypedValue<mlir::acc::PointerLikeType>();
+                  })};
   return varPtr;
 }
 
-mlir::Value mlir::acc::getAccPtr(mlir::Operation *accDataClauseOp) {
-  auto accPtr{llvm::TypeSwitch<mlir::Operation *, mlir::Value>(accDataClauseOp)
+mlir::Value mlir::acc::getVar(mlir::Operation *accDataClauseOp) {
+  auto varPtr{
+      llvm::TypeSwitch<mlir::Operation *, mlir::Value>(accDataClauseOp)
+          .Case<ACC_DATA_ENTRY_OPS>([&](auto entry) { return entry.getVar(); })
+          .Default([&](mlir::Operation *) { return mlir::Value(); })};
+  return varPtr;
+}
+
+mlir::Type mlir::acc::getVarType(mlir::Operation *accDataClauseOp) {
+  auto varType{llvm::TypeSwitch<mlir::Operation *, mlir::Type>(accDataClauseOp)
+                   .Case<ACC_DATA_ENTRY_OPS>(
+                       [&](auto entry) { return entry.getVarType(); })
+                   .Case<mlir::acc::CopyoutOp, mlir::acc::UpdateHostOp>(
+                       [&](auto exit) { return exit.getVarType(); })
+                   .Default([&](mlir::Operation *) { return mlir::Type(); })};
+  return varType;
+}
+
+mlir::TypedValue<mlir::acc::PointerLikeType>
+mlir::acc::getAccPtr(mlir::Operation *accDataClauseOp) {
+  auto accPtr{llvm::TypeSwitch<mlir::Operation *,
+                               mlir::TypedValue<mlir::acc::PointerLikeType>>(
+                  accDataClauseOp)
                   .Case<ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS>(
                       [&](auto dataClause) { return dataClause.getAccPtr(); })
+                  .Default([&](mlir::Operation *) {
+                    return mlir::TypedValue<mlir::acc::PointerLikeType>();
+                  })};
+  return accPtr;
+}
+
+mlir::Value mlir::acc::getAccVar(mlir::Operation *accDataClauseOp) {
+  auto accPtr{llvm::TypeSwitch<mlir::Operation *, mlir::Value>(accDataClauseOp)
+                  .Case<ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS>(
+                      [&](auto dataClause) { return dataClause.getAccVar(); })
                   .Default([&](mlir::Operation *) { return mlir::Value(); })};
   return accPtr;
 }
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp
index fbdada9309d32..cfb8aa767b6f8 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp
@@ -9,9 +9,11 @@
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Diagnostics.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/IR/OwningOpRef.h"
+#include "mlir/IR/Value.h"
 #include "gtest/gtest.h"
 
 using namespace mlir;
@@ -446,10 +448,12 @@ void testShortDataEntryOpBuilders(OpBuilder &b, MLIRContext &context,
   OwningOpRef<memref::AllocaOp> varPtrOp =
       b.create<memref::AllocaOp>(loc, memrefTy);
 
-  OwningOpRef<Op> op = b.create<Op>(loc, varPtrOp->getResult(),
+  TypedValue<PointerLikeType> varPtr =
+      cast<TypedValue<PointerLikeType>>(varPtrOp->getResult());
+  OwningOpRef<Op> op = b.create<Op>(loc, varPtr,
                                     /*structured=*/true, /*implicit=*/true);
 
-  EXPECT_EQ(op->getVarPtr(), varPtrOp->getResult());
+  EXPECT_EQ(op->getVarPtr(), varPtr);
   EXPECT_EQ(op->getType(), memrefTy);
   EXPECT_EQ(op->getDataClause(), dataClause);
   EXPECT_TRUE(op->getImplicit());
@@ -457,7 +461,7 @@ void testShortDataEntryOpBuilders(OpBuilder &b, MLIRContext &context,
   EXPECT_TRUE(op->getBounds().empty());
   EXPECT_FALSE(op->getVarPtrPtr());
 
-  OwningOpRef<Op> op2 = b.create<Op>(loc, varPtrOp->getResult(),
+  OwningOpRef<Op> op2 = b.create<Op>(loc, varPtr,
                                      /*structured=*/false, /*implicit=*/false);
   EXPECT_FALSE(op2->getImplicit());
   EXPECT_FALSE(op2->getStructured());
@@ -467,13 +471,13 @@ void testShortDataEntryOpBuilders(OpBuilder &b, MLIRContext &context,
   OwningOpRef<DataBoundsOp> bounds =
       b.create<DataBoundsOp>(loc, extent->getResult());
   OwningOpRef<Op> opWithBounds =
-      b.create<Op>(loc, varPtrOp->getResult(),
+      b.create<Op>(loc, varPtr,
                    /*structured=*/true, /*implicit=*/true, bounds->getResult());
   EXPECT_FALSE(opWithBounds->getBounds().empty());
   EXPECT_EQ(opWithBounds->getBounds().back(), bounds->getResult());
 
   OwningOpRef<Op> opWithName =
-      b.create<Op>(loc, varPtrOp->getResult(),
+      b.create<Op>(loc, varPtr,
                    /*structured=*/true, /*implicit=*/true, "varName");
   EXPECT_EQ(opWithName->getNameAttr().str(), "varName");
 }
@@ -516,23 +520,26 @@ void testShortDataExitOpBuilders(OpBuilder &b, MLIRContext &context,
   auto memrefTy = MemRefType::get({}, b.getI32Type());
   OwningOpRef<memref::AllocaOp> varPtrOp =
       b.create<memref::AllocaOp>(loc, memrefTy);
+  TypedValue<PointerLikeType> varPtr =
+      cast<TypedValue<PointerLikeType>>(varPtrOp->getResult());
+
   OwningOpRef<GetDevicePtrOp> accPtrOp = b.create<GetDevicePtrOp>(
-      loc, varPtrOp->getResult(), /*structured=*/true, /*implicit=*/true);
+      loc, varPtr, /*structured=*/true, /*implicit=*/true);
+  TypedValue<PointerLikeType> accPtr =
+      cast<TypedValue<PointerLikeType>>(accPtrOp->getResult());
 
-  OwningOpRef<Op> op =
-      b.create<Op>(loc, accPtrOp->getResult(), varPtrOp->getResult(),
-                   /*structured=*/true, /*implicit=*/true);
+  OwningOpRef<Op> op = b.create<Op>(loc, accPtr, varPtr,
+                                    /*structured=*/true, /*implicit=*/true);
 
-  EXPECT_EQ(op->getVarPtr(), varPtrOp->getResult());
-  EXPECT_EQ(op->getAccPtr(), accPtrOp->getResult());
+  EXPECT_EQ(op->getVarPtr(), varPtr);
+  EXPECT_EQ(op->getAccPtr(), accPtr);
   EXPECT_EQ(op->getDataClause(), dataClause);
   EXPECT_TRUE(op->getImplicit());
   EXPECT_TRUE(op->getStructured());
   EXPECT_TRUE(op->getBounds().empty());
 
-  OwningOpRef<Op> op2 =
-      b.create<Op>(loc, accPtrOp->getResult(), varPtrOp->getResult(),
-                   /*structured=*/false, /*implicit=*/false);
+  OwningOpRef<Op> op2 = b.create<Op>(loc, accPtr, varPtr,
+                                     /*structured=*/false, /*implicit=*/false);
   EXPECT_FALSE(op2->getImplicit());
   EXPECT_FALSE(op2->getStructured());
 
@@ -541,13 +548,13 @@ void testShortDataExitOpBuilders(OpBuilder &b, MLIRContext &context,
   OwningOpRef<DataBoundsOp> bounds =
       b.create<DataBoundsOp>(loc, extent->getResult());
   OwningOpRef<Op> opWithBounds =
-      b.create<Op>(loc, accPtrOp->getResult(), varPtrOp->getResult(),
+      b.create<Op>(loc, accPtr, varPtr,
                    /*structured=*/true, /*implicit=*/true, bounds->getResult());
   EXPECT_FALSE(opWithBounds->getBounds().empty());
   EXPECT_EQ(opWithBounds->getBounds().back(), bounds->getResult());
 
   OwningOpRef<Op> opWithName =
-      b.create<Op>(loc, accPtrOp->getResult(), varPtrOp->getResult(),
+      b.create<Op>(loc, accPtr, varPtr,
                    /*structured=*/true, /*implicit=*/true, "varName");
   EXPECT_EQ(opWithName->getNameAttr().str(), "varName");
 }
@@ -565,19 +572,24 @@ void testShortDataExitNoVarPtrOpBuilders(OpBuilder &b, MLIRContext &context,
   auto memrefTy = MemRefType::get({}, b.getI32Type());
   OwningOpRef<memref::AllocaOp> varPtrOp =
       b.create<memref::AllocaOp>(loc, memrefTy);
+  TypedValue<PointerLikeType> varPtr =
+      cast<TypedValue<PointerLikeType>>(varPtrOp->getResult());
+
   OwningOpRef<GetDevicePtrOp> accPtrOp = b.create<GetDevicePtrOp>(
-      loc, varPtrOp->getResult(), /*structured=*/true, /*implicit=*/true);
+      loc, varPtr, /*structured=*/true, /*implicit=*/true);
+  TypedValue<PointerLikeType> accPtr =
+      cast<TypedValue<PointerLikeType>>(accPtrOp->getResult());
 
-  OwningOpRef<Op> op = b.create<Op>(loc, accPtrOp->getResult(),
+  OwningOpRef<Op> op = b.create<Op>(loc, accPtr,
                                     /*structured=*/true, /*implicit=*/true);
 
-  EXPECT_EQ(op->getAccPtr(), accPtrOp->getResult());
+  EXPECT_EQ(op->getAccPtr(), accPtr);
   EXPECT_EQ(op->getDataClause(), dataClause);
   EXPECT_TRUE(op->getImplicit());
   EXPECT_TRUE(op->getStructured());
   EXPECT_TRUE(op->getBounds().empty());
 
-  OwningOpRef<Op> op2 = b.create<Op>(loc, accPtrOp->getResult(),
+  OwningOpRef<Op> op2 = b.create<Op>(loc, accPtr,
                                      /*structured=*/false, /*implicit=*/false);
   EXPECT_FALSE(op2->getImplicit());
   EXPECT_FALSE(op2->getStructured());
@@ -587,13 +599,13 @@ void testShortDataExitNoVarPtrOpBuilders(OpBuilder &b, MLIRContext &context,
   OwningOpRef<DataBoundsOp> bounds =
       b.create<DataBoundsOp>(loc, extent->getResult());
   OwningOpRef<Op> opWithBounds =
-      b.create<Op>(loc, accPtrOp->getResult(),
+      b.create<Op>(loc, accPtr,
                    /*structured=*/true, /*implicit=*/true, bounds->getResult());
   EXPECT_FALSE(opWithBounds->getBounds().empty());
   EXPECT_EQ(opWithBounds->getBounds().back(), bounds->getResult());
 
   OwningOpRef<Op> opWithName =
-      b.create<Op>(loc, accPtrOp->getResult(),
+      b.create<Op>(loc, accPtr,
                    /*structured=*/true, /*implicit=*/true, "varName");
   EXPECT_EQ(opWithName->getNameAttr().str(), "varName");
 }
@@ -604,3 +616,75 @@ TEST_F(OpenACCOpsTest, shortDataExitOpNoVarPtrBuilder) {
   testShortDataExitNoVarPtrOpBuilders<DetachOp>(b, context, loc,
                                                 DataClause::acc_detach);
 }
+
+template <typename Op>
+void testShortDataEntryOpBuildersMappableVar(OpBuilder &b, MLIRContext &context,
+                                             Location loc,
+                                             DataClause dataClause) {
+  auto int64Ty = b.getI64Type();
+  auto memrefTy = MemRefType::get({}, int64Ty);
+  OwningOpRef<memref::AllocaOp> varPtrOp =
+      b.create<memref::AllocaOp>(loc, memrefTy);
+  SmallVector<Value> indices;
+  OwningOpRef<memref::LoadOp> loadVarOp =
+      b.create<memref::LoadOp>(loc, int64Ty, varPtrOp->getResult(), indices);
+
+  EXPECT_TRUE(isMappableType(loadVarOp->getResult().getType()));
+  TypedValue<MappableType> var =
+      cast<TypedValue<MappableType>>(loadVarOp->getResult());
+  OwningOpRef<Op> op = b.create<Op>(loc, var,
+                                    /*structured=*/true, /*implicit=*/true);
+
+  EXPECT_EQ(op->getVar(), var);
+  EXPECT_EQ(op->getVarPtr(), nullptr);
+  EXPECT_EQ(op->getType(), int64Ty);
+  EXPECT_EQ(op->getVarType(), int64Ty);
+  EXPECT_EQ(op->getDataClause(), dataClause);
+  EXPECT_TRUE(op->getImplicit());
+  EXPECT_TRUE(op->getStructured());
+  EXPECT_TRUE(op->getBounds().empty());
+  EXPECT_FALSE(op->getVarPtrPtr());
+}
+
+struct IntegerOpenACCMappableModel
+    : public mlir::acc::MappableType::ExternalModel<IntegerOpenACCMappableModel,
+                                                    IntegerType> {};
+
+TEST_F(OpenACCOpsTest, mappableTypeBuilderDataEntry) {
+  // First, set up the test by attaching MappableInterface to IntegerType.
+  IntegerType i64ty = IntegerType::get(&context, 8);
+  ASSERT_FALSE(isMappableType(i64ty));
+  IntegerType::attachInterface<IntegerOpenACCMappableModel>(context);
+  ASSERT_TRUE(isMappableType(i64ty));
+
+  testShortDataEntryOpBuildersMappableVar<PrivateOp>(b, context, loc,
+                                                     DataClause::acc_private);
+  testShortDataEntryOpBuildersMappableVar<FirstprivateOp>(
+      b, context, loc, DataClause::acc_firstprivate);
+  testShortDataEntryOpBuildersMappableVar<ReductionOp>(
+      b, context, loc, DataClause::acc_reduction);
+  testShortDataEntryOpBuildersMappableVar<DevicePtrOp>(
+      b, context, loc, DataClause::acc_deviceptr);
+  testShortDataEntryOpBuildersMappableVar<PresentOp>(b, context, loc,
+                                                     DataClause::acc_present);
+  testShortDataEntryOpBuildersMappableVar<CopyinOp>(b, context, loc,
+                                                    DataClause::acc_copyin);
+  testShortDataEntryOpBuildersMappableVar<CreateOp>(b, context, loc,
+                                                    DataClause::acc_create);
+  testShortDataEntryOpBuildersMappableVar<NoCreateOp>(
+      b, context, loc, DataClause::acc_no_create);
+  testShortDataEntryOpBuildersMappableVar<AttachOp>(b, context, loc,
+                                                    DataClause::acc_attach);
+  testShortDataEntryOpBuildersMappableVar<GetDevicePtrOp>(
+      b, context, loc, DataClause::acc_getdeviceptr);
+  testShortDataEntryOpBuildersMappableVar<UpdateDeviceOp>(
+      b, context, loc, DataClause::acc_update_device);
+  testShortDataEntryOpBuildersMappableVar<UseDeviceOp>(
+      b, context, loc, DataClause::acc_use_device);
+  testShortDataEntryOpBuildersMappableVar<DeclareDeviceResidentOp>(
+      b, context, loc, DataClause::acc_declare_device_resident);
+  testShortDataEntryOpBuildersMappableVar<DeclareLinkOp>(
+      b, context, loc, DataClause::acc_declare_link);
+  testShortDataEntryOpBuildersMappableVar<CacheOp>(b, context, loc,
+                                                   DataClause::acc_cache);
+}

From d5145715f7e0866fe6526f9f09558dd698344d52 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 13:31:02 -0500
Subject: [PATCH 25/79] [RISCV][VLOPT] Add vfirst and vcpop to getOperandInfo
 (#122295)

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    |  4 ++
 .../test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 60 +++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 9a0938bc38dd4..2c04dd062670f 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -488,6 +488,10 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
   case RISCV::VFCVT_F_X_V:
   // Vector Floating-Point Merge Instruction
   case RISCV::VFMERGE_VFM:
+  // Vector count population in mask vcpop.m
+  // vfirst find-first-set mask bit
+  case RISCV::VCPOP_M:
+  case RISCV::VFIRST_M:
     return MILog2SEW;
 
   // Vector Widening Integer Add/Subtract
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
index 389a7ff1fdbc3..fe0929a6f8745 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir
@@ -1438,3 +1438,63 @@ body: |
     %x:vr = nofpexcept PseudoVFCVT_X_F_V_M1 $noreg, $noreg, 0, -1, 5 /* e32 */, 0
     %y:vr = PseudoVFREDMAX_VS_MF2_E32 $noreg, %x, %x, 1, 5 /* e32 */, 0
 ...
+---
+name: vfirst_v
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vfirst_v
+    ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, 1, 0 /* e8 */
+    ; CHECK-NEXT: %y:gpr = PseudoVFIRST_M_B8 %x, 1, 0 /* e8 */
+    %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0
+    %y:gpr = PseudoVFIRST_M_B8 %x, 1, 0
+...
+---
+name: vfirst_v_incompatible_eew
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vfirst_v_incompatible_eew
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:gpr = PseudoVFIRST_M_B8 %x, 1, 0 /* e8 */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:gpr = PseudoVFIRST_M_B8 %x, 1, 0
+...
+---
+name: vfirst_v_incompatible_emul
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vfirst_v_incompatible_emul
+    ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 /* e8 */
+    ; CHECK-NEXT: %y:gpr = PseudoVFIRST_M_B16 %x, 1, 0 /* e8 */
+    %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0
+    %y:gpr = PseudoVFIRST_M_B16 %x, 1, 0
+...
+---
+name: vcpop_v
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vcpop_v
+    ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, 1, 0 /* e8 */
+    ; CHECK-NEXT: %y:gpr = PseudoVCPOP_M_B8 %x, 1, 0 /* e8 */
+    %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0
+    %y:gpr = PseudoVCPOP_M_B8 %x, 1, 0
+...
+---
+name: vcopop_v_incompatible_eew
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vcopop_v_incompatible_eew
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:gpr = PseudoVCPOP_M_B8 %x, 1, 0 /* e8 */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0
+    %y:gpr = PseudoVCPOP_M_B8 %x, 1, 0
+...
+---
+name: vcpop_v_incompaitble_emul
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vcpop_v_incompaitble_emul
+    ; CHECK: %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0 /* e8 */
+    ; CHECK-NEXT: %y:gpr = PseudoVCPOP_M_B16 %x, 1, 0 /* e8 */
+    %x:vr = PseudoVMAND_MM_B8 $noreg, $noreg, -1, 0
+    %y:gpr = PseudoVCPOP_M_B16 %x, 1, 0
+...

From 8e12037d38e2a9a1cfc6402be2b33283e3220bcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Thu, 9 Jan 2025 19:37:24 +0100
Subject: [PATCH 26/79] [flang] Fix finding system install of LLVM/Clang/MLIR
 in standalone builds (#120914)

The changes in #87822 introduced a regression where Flang could no
longer be built standalone without explicitly specifying all of
LLVM_DIR, CLANG_DIR and MLIR_DIR. Restore the earlier logic that used
these paths as hints, and supported finding system-wide LLVM install via
default paths. Instead, make paths absolute after locating the packages,
using the paths CMake determined.

-----

@vzakhari, could you confirm that this doesn't break your use case?
---
 flang/CMakeLists.txt | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 68947eaa9c9bd..34b3fedc88494 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -89,13 +89,16 @@ if (FLANG_STANDALONE_BUILD)
     mark_as_advanced(LLVM_ENABLE_ASSERTIONS)
   endif()
 
+  # We need a pre-built/installed version of LLVM.
+  find_package(LLVM REQUIRED HINTS "${LLVM_DIR}")
   # If the user specifies a relative path to LLVM_DIR, the calls to include
   # LLVM modules fail. Append the absolute path to LLVM_DIR instead.
   get_filename_component(LLVM_DIR_ABSOLUTE ${LLVM_DIR}
     REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR})
   list(APPEND CMAKE_MODULE_PATH ${LLVM_DIR_ABSOLUTE})
-  # We need a pre-built/installed version of LLVM.
-  find_package(LLVM REQUIRED HINTS "${LLVM_DIR_ABSOLUTE}")
+
+  # TODO: Remove when libclangDriver is lifted out of Clang
+  find_package(Clang REQUIRED PATHS "${CLANG_DIR}")
 
   # Users might specify a path to CLANG_DIR that's:
   #   * a full path, or
@@ -104,17 +107,11 @@ if (FLANG_STANDALONE_BUILD)
   # cases.
   get_filename_component(
     CLANG_DIR_ABSOLUTE
-    ${CLANG_DIR}
+    ${Clang_DIR}
     REALPATH
     BASE_DIR ${CMAKE_CURRENT_BINARY_DIR})
   list(APPEND CMAKE_MODULE_PATH ${CLANG_DIR_ABSOLUTE})
 
-  # TODO: Remove when libclangDriver is lifted out of Clang
-  find_package(Clang REQUIRED PATHS "${CLANG_DIR_ABSOLUTE}" NO_DEFAULT_PATH)
-  if (NOT Clang_FOUND)
-    message(FATAL_ERROR "Failed to find Clang")
-  endif()
-
   # If LLVM links to zlib we need the imported targets so we can too.
   if(LLVM_ENABLE_ZLIB)
     find_package(ZLIB REQUIRED)
@@ -132,12 +129,12 @@ if (FLANG_STANDALONE_BUILD)
   include(AddClang)
 
   include(TableGen)
+  find_package(MLIR REQUIRED CONFIG HINTS ${MLIR_DIR})
   # If the user specifies a relative path to MLIR_DIR, the calls to include
   # MLIR modules fail. Append the absolute path to MLIR_DIR instead.
   get_filename_component(MLIR_DIR_ABSOLUTE ${MLIR_DIR}
     REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR})
   list(APPEND CMAKE_MODULE_PATH ${MLIR_DIR_ABSOLUTE})
-  find_package(MLIR REQUIRED CONFIG HINTS ${MLIR_DIR_ABSOLUTE})
   # Use SYSTEM for the same reasons as for LLVM includes
   include_directories(SYSTEM ${MLIR_INCLUDE_DIRS})
   include(AddMLIR)

From 3ba46ddb7994c4fa63d3d34166b6f3dedb07a184 Mon Sep 17 00:00:00 2001
From: Brox Chen <guochen2@amd.com>
Date: Thu, 9 Jan 2025 13:38:28 -0500
Subject: [PATCH 27/79] [AMDGPU][True16][MC][NFC] add true16/fake16 test for
 gfx12 vop3c/3cx (#122316)

This is a NFC. Duplicate mc test file for gfx12 vop3c/vop3cx to
true16/fake16 mode and update it with +real-true16/-real-true16 flag.

This is for the upcoming true16 changes
---
 .../MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s  | 6647 +++++++++++++++++
 llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s   |    8 +-
 .../MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s   | 2975 ++++++++
 llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s    |    8 +-
 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s | 3414 +++++++++
 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s        |    4 +-
 .../MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s | 2595 +++++++
 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s  |    4 +-
 .../MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s  |  897 +++
 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s   |    4 +-
 10 files changed, 16542 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s
 create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s
 create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s
 create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s
 create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s

diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s
new file mode 100644
index 0000000000000..ebf537724393f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16-fake16.s
@@ -0,0 +1,6647 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_class_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_class_f16_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7d,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_class_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_class_f32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_class_f32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_class_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_class_f32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_class_f32_e64_dpp null, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x01,0x7e,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_eq_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x02,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x02,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_eq_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x12,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x12,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x12,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x12,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_eq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x12,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_eq_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_eq_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_eq_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_eq_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x42,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_eq_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_eq_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_eq_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_eq_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_eq_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_eq_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_eq_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_eq_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4a,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ge_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x06,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x06,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ge_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x16,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x16,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x16,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x16,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x16,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ge_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ge_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ge_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ge_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x46,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ge_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ge_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ge_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ge_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ge_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ge_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ge_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ge_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4e,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_gt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x04,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x04,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_gt_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x14,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x14,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x14,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x14,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_gt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x14,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_gt_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_gt_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_gt_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_gt_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x44,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_gt_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_gt_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_gt_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_gt_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_gt_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_gt_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_gt_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_gt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4c,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_le_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x03,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x03,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_le_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x13,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x13,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x13,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x13,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_le_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x13,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_le_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_le_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_le_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_le_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x43,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_le_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_le_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_le_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_le_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_le_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_le_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_le_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_le_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_le_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4b,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_lg_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x05,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x05,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_lg_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lg_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lg_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x15,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lg_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x15,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lg_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x15,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x15,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x15,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_lt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x01,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x01,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x01,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x01,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x01,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_lt_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x11,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x11,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x11,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x11,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x11,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_lt_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lt_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lt_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lt_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lt_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_lt_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lt_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lt_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lt_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lt_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lt_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lt_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x41,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_lt_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lt_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lt_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lt_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lt_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_lt_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_lt_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_lt_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_lt_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_lt_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_lt_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_lt_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x49,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ne_i16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ne_i16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ne_i16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ne_i16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ne_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ne_i32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ne_i32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ne_i32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ne_i32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ne_i32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ne_i32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ne_i32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x45,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ne_u16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ne_u16_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ne_u16_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ne_u16_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ne_u16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ne_u32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ne_u32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ne_u32_e64_dpp vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ne_u32_e64_dpp ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ne_u32_e64_dpp vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ne_u32_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x00,0x4d,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_neq_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_neq_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_neq_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_neq_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_neq_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_neq_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_nge_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x09,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x09,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_nge_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nge_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nge_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x19,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nge_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x19,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nge_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x19,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x19,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x19,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ngt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_ngt_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_ngt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_ngt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_ngt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_ngt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_nle_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_nle_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nle_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nle_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nle_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nle_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_nlg_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_nlg_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlg_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlg_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlg_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlg_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_nlt_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x0e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x0e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_nlt_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_nlt_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_nlt_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x1e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_nlt_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x1e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_nlt_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x1e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x1e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x1e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_o_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x07,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x07,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_o_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_o_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_o_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x17,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_o_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x17,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_o_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_o_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x17,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x17,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x17,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_u_f16_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x08,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x08,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 row_mirror
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 row_half_mirror
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 row_shl:1
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 row_shl:15
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 row_shr:1
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 row_shr:15
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 row_ror:1
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s105, v1, v2 row_ror:15
+// W32: v_cmp_u_f32_e64_dpp s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x69,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: v_cmp_u_f32_e64_dpp vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: v_cmp_u_f32_e64_dpp vcc_hi, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6b,0x01,0x18,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: v_cmp_u_f32_e64_dpp ttmp15, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7b,0x02,0x18,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3]
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_mirror
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_half_mirror
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:1
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:15
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shr:1
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shr:15
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_ror:1
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_ror:15
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: v_cmp_u_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: v_cmp_u_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x18,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: v_cmp_u_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7a,0x02,0x18,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmp_u_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x18,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s
index a508cbd661415..98462309b2a3b 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s
@@ -1,8 +1,8 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
 // W32: v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s
new file mode 100644
index 0000000000000..5bdc27ce1974a
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8-fake16.s
@@ -0,0 +1,2975 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_class_f16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x7d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_class_f16_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7d,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_class_f32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x7e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_class_f32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x7e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+
+v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_class_f32_e64_dpp null, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x01,0x7e,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+v_cmp_eq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x02,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x02,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_eq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x02,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_eq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x12,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x12,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x12,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x12,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_eq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x12,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_eq_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x32,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x32,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x32,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x32,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_eq_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_eq_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x42,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x42,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x42,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x42,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x42,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x42,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_eq_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x42,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_eq_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3a,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3a,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_eq_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_eq_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_eq_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4a,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_eq_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4a,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_eq_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4a,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_ge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x06,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x06,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x06,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_ge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x16,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x16,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x16,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x16,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x16,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_ge_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x36,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x36,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x36,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x36,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ge_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_ge_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x46,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x46,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x46,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x46,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ge_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x46,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_ge_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ge_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_ge_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ge_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ge_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4e,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ge_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4e,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_gt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x04,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x04,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_gt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x04,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_gt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x14,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x14,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x14,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x14,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_gt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x14,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_gt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x34,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x34,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x34,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x34,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_gt_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_gt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x44,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x44,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x44,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x44,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_gt_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x44,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_gt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3c,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3c,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_gt_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_gt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_gt_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4c,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_gt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4c,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_gt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4c,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_le_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x03,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x03,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_le_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x03,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_le_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x13,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x13,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x13,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x13,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_le_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x13,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_le_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x33,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x33,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x33,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x33,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_le_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_le_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x43,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x43,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x43,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x43,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x43,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x43,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_le_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x43,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_le_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3b,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3b,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_le_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_le_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_le_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4b,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_le_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4b,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_le_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4b,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_lg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x05,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x05,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_lg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x05,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_lg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x15,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lg_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x15,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x15,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x15,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x15,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_lt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x01,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x01,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x01,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x01,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x01,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x01,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_lt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x01,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_lt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x11,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x11,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x11,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x11,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x11,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x11,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_lt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x11,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_lt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lt_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x31,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lt_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x31,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_lt_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_lt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lt_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x41,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x41,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x41,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lt_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x41,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_lt_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x41,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_lt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lt_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x39,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x39,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x39,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lt_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x39,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_lt_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_lt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_lt_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x49,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x49,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x49,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_lt_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x49,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_lt_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x49,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_ne_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ne_i16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x35,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x35,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x35,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ne_i16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x35,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ne_i16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_ne_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ne_i32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x45,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x45,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ne_i32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x45,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ne_i32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x45,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_ne_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ne_u16_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x3d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ne_u16_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x3d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ne_u16_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_ne_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u32_e64_dpp vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ne_u32_e64_dpp ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x00,0x4d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u32_e64_dpp vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ne_u32_e64_dpp ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x00,0x4d,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ne_u32_e64_dpp null, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x00,0x4d,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmp_neq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_neq_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_neq_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_neq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_neq_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_neq_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_neq_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x09,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nge_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x09,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nge_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x09,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x19,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nge_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x19,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x19,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x19,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nge_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x19,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_ngt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ngt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ngt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_ngt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_ngt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_ngt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_ngt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nle_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nle_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nle_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nle_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nle_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nle_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nle_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nlg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlg_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nlg_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nlg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlg_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlg_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nlg_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nlt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x0e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlt_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x0e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nlt_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x0e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_nlt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x1e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_nlt_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x1e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_nlt_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x1e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_nlt_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x1e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_o_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x07,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_o_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x07,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_o_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x07,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_o_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x17,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_o_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x17,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x17,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_o_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x17,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_o_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x17,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_u_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f16_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x08,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_u_f16_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x08,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_u_f16_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x08,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmp_u_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x69,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f32_e64_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f32_e64_dpp vcc_hi, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6b,0x01,0x18,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: v_cmp_u_f32_e64_dpp ttmp15, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7b,0x02,0x18,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x18,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: v_cmp_u_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7a,0x02,0x18,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+
+v_cmp_u_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmp_u_f32_e64_dpp null, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7c,0x83,0x18,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s
index 96a3d356ae282..4617914b502b3 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s
@@ -1,8 +1,8 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s
new file mode 100644
index 0000000000000..a76830385004b
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx-fake16.s
@@ -0,0 +1,3414 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+
+v_cmpx_class_f16_e64 v1, v2
+// GFX12: v_cmpx_class_f16_e64 v1, v2             ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_class_f16_e64 v255, v2
+// GFX12: v_cmpx_class_f16_e64 v255, v2           ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00]
+
+v_cmpx_class_f16_e64 s1, v2
+// GFX12: v_cmpx_class_f16_e64 s1, v2             ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x04,0x02,0x00]
+
+v_cmpx_class_f16_e64 s105, v255
+// GFX12: v_cmpx_class_f16_e64 s105, v255         ; encoding: [0x7e,0x00,0xfd,0xd4,0x69,0xfe,0x03,0x00]
+
+v_cmpx_class_f16_e64 vcc_lo, s2
+// GFX12: v_cmpx_class_f16_e64 vcc_lo, s2         ; encoding: [0x7e,0x00,0xfd,0xd4,0x6a,0x04,0x00,0x00]
+
+v_cmpx_class_f16_e64 vcc_hi, s105
+// GFX12: v_cmpx_class_f16_e64 vcc_hi, s105       ; encoding: [0x7e,0x00,0xfd,0xd4,0x6b,0xd2,0x00,0x00]
+
+v_cmpx_class_f16_e64 ttmp15, ttmp15
+// GFX12: v_cmpx_class_f16_e64 ttmp15, ttmp15     ; encoding: [0x7e,0x00,0xfd,0xd4,0x7b,0xf6,0x00,0x00]
+
+v_cmpx_class_f16_e64 m0, src_scc
+// GFX12: v_cmpx_class_f16_e64 m0, src_scc        ; encoding: [0x7e,0x00,0xfd,0xd4,0x7d,0xfa,0x01,0x00]
+
+v_cmpx_class_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_class_f16_e64 exec_lo, -1        ; encoding: [0x7e,0x00,0xfd,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_class_f16_e64 exec_hi, null
+// GFX12: v_cmpx_class_f16_e64 exec_hi, null      ; encoding: [0x7e,0x00,0xfd,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_class_f16_e64 null, exec_lo
+// GFX12: v_cmpx_class_f16_e64 null, exec_lo      ; encoding: [0x7e,0x00,0xfd,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_class_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_class_f16_e64 -1, exec_hi        ; encoding: [0x7e,0x00,0xfd,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_class_f16_e64 0.5, m0
+// GFX12: v_cmpx_class_f16_e64 0.5, m0            ; encoding: [0x7e,0x00,0xfd,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_class_f16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_class_f16_e64 src_scc, vcc_lo    ; encoding: [0x7e,0x00,0xfd,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi
+// GFX12: v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi  ; encoding: [0x7e,0x01,0xfd,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_class_f32_e64 v1, v2
+// GFX12: v_cmpx_class_f32_e64 v1, v2             ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_class_f32_e64 v255, v255
+// GFX12: v_cmpx_class_f32_e64 v255, v255         ; encoding: [0x7e,0x00,0xfe,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_class_f32_e64 s1, s2
+// GFX12: v_cmpx_class_f32_e64 s1, s2             ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_class_f32_e64 s105, s105
+// GFX12: v_cmpx_class_f32_e64 s105, s105         ; encoding: [0x7e,0x00,0xfe,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_class_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_class_f32_e64 vcc_lo, ttmp15     ; encoding: [0x7e,0x00,0xfe,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_class_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_class_f32_e64 vcc_hi, 0xaf123456 ; encoding: [0x7e,0x00,0xfe,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_class_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_class_f32_e64 ttmp15, src_scc    ; encoding: [0x7e,0x00,0xfe,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_class_f32_e64 m0, 0.5
+// GFX12: v_cmpx_class_f32_e64 m0, 0.5            ; encoding: [0x7e,0x00,0xfe,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_class_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_class_f32_e64 exec_lo, -1        ; encoding: [0x7e,0x00,0xfe,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_class_f32_e64 exec_hi, null
+// GFX12: v_cmpx_class_f32_e64 exec_hi, null      ; encoding: [0x7e,0x00,0xfe,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_class_f32_e64 null, exec_lo
+// GFX12: v_cmpx_class_f32_e64 null, exec_lo      ; encoding: [0x7e,0x00,0xfe,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_class_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_class_f32_e64 -1, exec_hi        ; encoding: [0x7e,0x00,0xfe,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_class_f32_e64 0.5, m0
+// GFX12: v_cmpx_class_f32_e64 0.5, m0            ; encoding: [0x7e,0x00,0xfe,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_class_f32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_class_f32_e64 src_scc, vcc_lo    ; encoding: [0x7e,0x00,0xfe,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_class_f32_e64 -|0xaf123456|, vcc_hi
+// GFX12: v_cmpx_class_f32_e64 -|0xaf123456|, vcc_hi ; encoding: [0x7e,0x01,0xfe,0xd4,0xff,0xd6,0x00,0x20,0x56,0x34,0x12,0xaf]
+
+v_cmpx_class_f64_e64 v[1:2], v2
+// GFX12: v_cmpx_class_f64_e64 v[1:2], v2         ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_class_f64_e64 v[1:2], v255
+// GFX12: v_cmpx_class_f64_e64 v[1:2], v255       ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xff,0x03,0x00]
+
+v_cmpx_class_f64_e64 v[1:2], s2
+// GFX12: v_cmpx_class_f64_e64 v[1:2], s2         ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0x05,0x00,0x00]
+
+v_cmpx_class_f64_e64 v[1:2], s105
+// GFX12: v_cmpx_class_f64_e64 v[1:2], s105       ; encoding: [0x7e,0x00,0xff,0xd4,0x01,0xd3,0x00,0x00]
+
+v_cmpx_class_f64_e64 v[254:255], ttmp15
+// GFX12: v_cmpx_class_f64_e64 v[254:255], ttmp15 ; encoding: [0x7e,0x00,0xff,0xd4,0xfe,0xf7,0x00,0x00]
+
+v_cmpx_class_f64_e64 s[2:3], vcc_hi
+// GFX12: v_cmpx_class_f64_e64 s[2:3], vcc_hi     ; encoding: [0x7e,0x00,0xff,0xd4,0x02,0xd6,0x00,0x00]
+
+v_cmpx_class_f64_e64 s[104:105], vcc_lo
+// GFX12: v_cmpx_class_f64_e64 s[104:105], vcc_lo ; encoding: [0x7e,0x00,0xff,0xd4,0x68,0xd4,0x00,0x00]
+
+v_cmpx_class_f64_e64 vcc, m0
+// GFX12: v_cmpx_class_f64_e64 vcc, m0            ; encoding: [0x7e,0x00,0xff,0xd4,0x6a,0xfa,0x00,0x00]
+
+v_cmpx_class_f64_e64 ttmp[14:15], exec_hi
+// GFX12: v_cmpx_class_f64_e64 ttmp[14:15], exec_hi ; encoding: [0x7e,0x00,0xff,0xd4,0x7a,0xfe,0x00,0x00]
+
+v_cmpx_class_f64_e64 exec, exec_lo
+// GFX12: v_cmpx_class_f64_e64 exec, exec_lo      ; encoding: [0x7e,0x00,0xff,0xd4,0x7e,0xfc,0x00,0x00]
+
+v_cmpx_class_f64_e64 null, null
+// GFX12: v_cmpx_class_f64_e64 null, null         ; encoding: [0x7e,0x00,0xff,0xd4,0x7c,0xf8,0x00,0x00]
+
+v_cmpx_class_f64_e64 -1, -1
+// GFX12: v_cmpx_class_f64_e64 -1, -1             ; encoding: [0x7e,0x00,0xff,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_class_f64_e64 0.5, 0.5
+// GFX12: v_cmpx_class_f64_e64 0.5, 0.5           ; encoding: [0x7e,0x00,0xff,0xd4,0xf0,0xe0,0x01,0x00]
+
+v_cmpx_class_f64_e64 -|src_scc|, src_scc
+// GFX12: v_cmpx_class_f64_e64 -|src_scc|, src_scc ; encoding: [0x7e,0x01,0xff,0xd4,0xfd,0xfa,0x01,0x20]
+
+v_cmpx_class_f64_e64 0xaf123456, 0xaf123456
+// GFX12: v_cmpx_class_f64_e64 0xaf123456, 0xaf123456 ; encoding: [0x7e,0x00,0xff,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_f16_e64 v1, v2
+// GFX12: v_cmpx_eq_f16_e64 v1, v2                ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_eq_f16_e64 v255, v255
+// GFX12: v_cmpx_eq_f16_e64 v255, v255            ; encoding: [0x7e,0x00,0x82,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_eq_f16_e64 s1, s2
+// GFX12: v_cmpx_eq_f16_e64 s1, s2                ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_eq_f16_e64 s105, s105
+// GFX12: v_cmpx_eq_f16_e64 s105, s105            ; encoding: [0x7e,0x00,0x82,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_eq_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_eq_f16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x82,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_eq_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_eq_f16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0x82,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_eq_f16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x82,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_eq_f16_e64 m0, 0.5
+// GFX12: v_cmpx_eq_f16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x82,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_eq_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_eq_f16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x82,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_eq_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_eq_f16_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x82,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_eq_f16_e64 null, exec_lo
+// GFX12: v_cmpx_eq_f16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x82,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_eq_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_eq_f16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x82,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_eq_f16_e64 0.5, -m0
+// GFX12: v_cmpx_eq_f16_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x82,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_eq_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_eq_f16_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x82,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_eq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_eq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x82,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_f32_e64 v1, v2
+// GFX12: v_cmpx_eq_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_eq_f32_e64 v255, v255
+// GFX12: v_cmpx_eq_f32_e64 v255, v255            ; encoding: [0x7e,0x00,0x92,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_eq_f32_e64 s1, s2
+// GFX12: v_cmpx_eq_f32_e64 s1, s2                ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_eq_f32_e64 s105, s105
+// GFX12: v_cmpx_eq_f32_e64 s105, s105            ; encoding: [0x7e,0x00,0x92,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_eq_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_eq_f32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x92,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_eq_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_eq_f32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0x92,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_eq_f32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x92,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_eq_f32_e64 m0, 0.5
+// GFX12: v_cmpx_eq_f32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x92,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_eq_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_eq_f32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x92,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_eq_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_eq_f32_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x92,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_eq_f32_e64 null, exec_lo
+// GFX12: v_cmpx_eq_f32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x92,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_eq_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_eq_f32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x92,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_eq_f32_e64 0.5, -m0
+// GFX12: v_cmpx_eq_f32_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x92,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_eq_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_eq_f32_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x92,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_eq_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_eq_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x92,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_eq_f64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xa2,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_eq_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_eq_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa2,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_eq_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_eq_f64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xa2,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_eq_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_eq_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa2,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_eq_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_eq_f64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xa2,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_eq_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_eq_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa2,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_eq_f64_e64 -|exec|, src_scc      ; encoding: [0x7e,0x01,0xa2,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_eq_f64_e64 null, 0.5
+// GFX12: v_cmpx_eq_f64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xa2,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_eq_f64_e64 -1, -1
+// GFX12: v_cmpx_eq_f64_e64 -1, -1                ; encoding: [0x7e,0x00,0xa2,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_eq_f64_e64 0.5, null
+// GFX12: v_cmpx_eq_f64_e64 0.5, null             ; encoding: [0x7e,0x00,0xa2,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_eq_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_eq_f64_e64 -|src_scc|, -|exec|   ; encoding: [0x7e,0x03,0xa2,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_eq_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_eq_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa2,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_i16_e64 v1, v2
+// GFX12: v_cmpx_eq_i16_e64 v1, v2                ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_eq_i16_e64 v255, v255
+// GFX12: v_cmpx_eq_i16_e64 v255, v255            ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_eq_i16_e64 s1, s2
+// GFX12: v_cmpx_eq_i16_e64 s1, s2                ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_eq_i16_e64 s105, s105
+// GFX12: v_cmpx_eq_i16_e64 s105, s105            ; encoding: [0x7e,0x00,0xb2,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_eq_i16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_eq_i16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xb2,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_eq_i16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_eq_i16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xb2,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_i16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_eq_i16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xb2,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_eq_i16_e64 m0, 0.5
+// GFX12: v_cmpx_eq_i16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xb2,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_eq_i16_e64 exec_lo, -1
+// GFX12: v_cmpx_eq_i16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xb2,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_eq_i16_e64 exec_hi, null
+// GFX12: v_cmpx_eq_i16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xb2,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_eq_i16_e64 null, exec_lo
+// GFX12: v_cmpx_eq_i16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xb2,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_eq_i16_e64 -1, exec_hi
+// GFX12: v_cmpx_eq_i16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xb2,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_eq_i16_e64 0.5, m0
+// GFX12: v_cmpx_eq_i16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xb2,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_eq_i16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_eq_i16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xb2,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_eq_i16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_eq_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_i32_e64 v1, v2
+// GFX12: v_cmpx_eq_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_eq_i32_e64 v255, v255
+// GFX12: v_cmpx_eq_i32_e64 v255, v255            ; encoding: [0x7e,0x00,0xc2,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_eq_i32_e64 s1, s2
+// GFX12: v_cmpx_eq_i32_e64 s1, s2                ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_eq_i32_e64 s105, s105
+// GFX12: v_cmpx_eq_i32_e64 s105, s105            ; encoding: [0x7e,0x00,0xc2,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_eq_i32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_eq_i32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xc2,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_eq_i32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_eq_i32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xc2,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_i32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_eq_i32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xc2,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_eq_i32_e64 m0, 0.5
+// GFX12: v_cmpx_eq_i32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xc2,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_eq_i32_e64 exec_lo, -1
+// GFX12: v_cmpx_eq_i32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xc2,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_eq_i32_e64 exec_hi, null
+// GFX12: v_cmpx_eq_i32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xc2,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_eq_i32_e64 null, exec_lo
+// GFX12: v_cmpx_eq_i32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xc2,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_eq_i32_e64 -1, exec_hi
+// GFX12: v_cmpx_eq_i32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xc2,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_eq_i32_e64 0.5, m0
+// GFX12: v_cmpx_eq_i32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xc2,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_eq_i32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_eq_i32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xc2,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_eq_i32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_eq_i32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xc2,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_i64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_eq_i64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xd2,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_eq_i64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_eq_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd2,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_eq_i64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_eq_i64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xd2,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_eq_i64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_eq_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd2,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_eq_i64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_eq_i64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xd2,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_eq_i64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_eq_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd2,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_i64_e64 exec, src_scc
+// GFX12: v_cmpx_eq_i64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xd2,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_eq_i64_e64 null, 0.5
+// GFX12: v_cmpx_eq_i64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xd2,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_eq_i64_e64 -1, -1
+// GFX12: v_cmpx_eq_i64_e64 -1, -1                ; encoding: [0x7e,0x00,0xd2,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_eq_i64_e64 0.5, null
+// GFX12: v_cmpx_eq_i64_e64 0.5, null             ; encoding: [0x7e,0x00,0xd2,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_eq_i64_e64 src_scc, exec
+// GFX12: v_cmpx_eq_i64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xd2,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_eq_i64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_eq_i64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xd2,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_u16_e64 v1, v2
+// GFX12: v_cmpx_eq_u16_e64 v1, v2                ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_eq_u16_e64 v255, v255
+// GFX12: v_cmpx_eq_u16_e64 v255, v255            ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_eq_u16_e64 s1, s2
+// GFX12: v_cmpx_eq_u16_e64 s1, s2                ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_eq_u16_e64 s105, s105
+// GFX12: v_cmpx_eq_u16_e64 s105, s105            ; encoding: [0x7e,0x00,0xba,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_eq_u16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_eq_u16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xba,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_eq_u16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_eq_u16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xba,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_u16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_eq_u16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xba,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_eq_u16_e64 m0, 0.5
+// GFX12: v_cmpx_eq_u16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xba,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_eq_u16_e64 exec_lo, -1
+// GFX12: v_cmpx_eq_u16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xba,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_eq_u16_e64 exec_hi, null
+// GFX12: v_cmpx_eq_u16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xba,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_eq_u16_e64 null, exec_lo
+// GFX12: v_cmpx_eq_u16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xba,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_eq_u16_e64 -1, exec_hi
+// GFX12: v_cmpx_eq_u16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xba,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_eq_u16_e64 0.5, m0
+// GFX12: v_cmpx_eq_u16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xba,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_eq_u16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_eq_u16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xba,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_eq_u16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_eq_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_u32_e64 v1, v2
+// GFX12: v_cmpx_eq_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_eq_u32_e64 v255, v255
+// GFX12: v_cmpx_eq_u32_e64 v255, v255            ; encoding: [0x7e,0x00,0xca,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_eq_u32_e64 s1, s2
+// GFX12: v_cmpx_eq_u32_e64 s1, s2                ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_eq_u32_e64 s105, s105
+// GFX12: v_cmpx_eq_u32_e64 s105, s105            ; encoding: [0x7e,0x00,0xca,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_eq_u32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_eq_u32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xca,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_eq_u32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_eq_u32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xca,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_u32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_eq_u32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xca,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_eq_u32_e64 m0, 0.5
+// GFX12: v_cmpx_eq_u32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xca,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_eq_u32_e64 exec_lo, -1
+// GFX12: v_cmpx_eq_u32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xca,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_eq_u32_e64 exec_hi, null
+// GFX12: v_cmpx_eq_u32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xca,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_eq_u32_e64 null, exec_lo
+// GFX12: v_cmpx_eq_u32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xca,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_eq_u32_e64 -1, exec_hi
+// GFX12: v_cmpx_eq_u32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xca,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_eq_u32_e64 0.5, m0
+// GFX12: v_cmpx_eq_u32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xca,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_eq_u32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_eq_u32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xca,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_eq_u32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_eq_u32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xca,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_u64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_eq_u64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xda,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_eq_u64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_eq_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xda,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_eq_u64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_eq_u64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xda,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_eq_u64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_eq_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xda,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_eq_u64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_eq_u64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xda,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_eq_u64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_eq_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xda,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_u64_e64 exec, src_scc
+// GFX12: v_cmpx_eq_u64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xda,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_eq_u64_e64 null, 0.5
+// GFX12: v_cmpx_eq_u64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xda,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_eq_u64_e64 -1, -1
+// GFX12: v_cmpx_eq_u64_e64 -1, -1                ; encoding: [0x7e,0x00,0xda,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_eq_u64_e64 0.5, null
+// GFX12: v_cmpx_eq_u64_e64 0.5, null             ; encoding: [0x7e,0x00,0xda,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_eq_u64_e64 src_scc, exec
+// GFX12: v_cmpx_eq_u64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xda,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_eq_u64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_eq_u64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xda,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_f16_e64 v1, v2
+// GFX12: v_cmpx_ge_f16_e64 v1, v2                ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ge_f16_e64 v255, v255
+// GFX12: v_cmpx_ge_f16_e64 v255, v255            ; encoding: [0x7e,0x00,0x86,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ge_f16_e64 s1, s2
+// GFX12: v_cmpx_ge_f16_e64 s1, s2                ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ge_f16_e64 s105, s105
+// GFX12: v_cmpx_ge_f16_e64 s105, s105            ; encoding: [0x7e,0x00,0x86,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ge_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ge_f16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x86,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ge_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_ge_f16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0x86,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ge_f16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x86,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ge_f16_e64 m0, 0.5
+// GFX12: v_cmpx_ge_f16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x86,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ge_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_ge_f16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x86,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ge_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_ge_f16_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x86,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ge_f16_e64 null, exec_lo
+// GFX12: v_cmpx_ge_f16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x86,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ge_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_ge_f16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x86,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ge_f16_e64 0.5, -m0
+// GFX12: v_cmpx_ge_f16_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x86,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_ge_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_ge_f16_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x86,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_ge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_ge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x86,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_f32_e64 v1, v2
+// GFX12: v_cmpx_ge_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ge_f32_e64 v255, v255
+// GFX12: v_cmpx_ge_f32_e64 v255, v255            ; encoding: [0x7e,0x00,0x96,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ge_f32_e64 s1, s2
+// GFX12: v_cmpx_ge_f32_e64 s1, s2                ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ge_f32_e64 s105, s105
+// GFX12: v_cmpx_ge_f32_e64 s105, s105            ; encoding: [0x7e,0x00,0x96,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ge_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ge_f32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x96,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ge_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_ge_f32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0x96,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ge_f32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x96,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ge_f32_e64 m0, 0.5
+// GFX12: v_cmpx_ge_f32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x96,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ge_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_ge_f32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x96,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ge_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_ge_f32_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x96,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ge_f32_e64 null, exec_lo
+// GFX12: v_cmpx_ge_f32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x96,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ge_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_ge_f32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x96,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ge_f32_e64 0.5, -m0
+// GFX12: v_cmpx_ge_f32_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x96,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_ge_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_ge_f32_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x96,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_ge_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_ge_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x96,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_ge_f64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xa6,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ge_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_ge_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa6,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_ge_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_ge_f64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xa6,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_ge_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_ge_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa6,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_ge_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_ge_f64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xa6,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_ge_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_ge_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa6,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_ge_f64_e64 -|exec|, src_scc      ; encoding: [0x7e,0x01,0xa6,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_ge_f64_e64 null, 0.5
+// GFX12: v_cmpx_ge_f64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xa6,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_ge_f64_e64 -1, -1
+// GFX12: v_cmpx_ge_f64_e64 -1, -1                ; encoding: [0x7e,0x00,0xa6,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_ge_f64_e64 0.5, null
+// GFX12: v_cmpx_ge_f64_e64 0.5, null             ; encoding: [0x7e,0x00,0xa6,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_ge_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_ge_f64_e64 -|src_scc|, -|exec|   ; encoding: [0x7e,0x03,0xa6,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_ge_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_ge_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa6,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_i16_e64 v1, v2
+// GFX12: v_cmpx_ge_i16_e64 v1, v2                ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ge_i16_e64 v255, v255
+// GFX12: v_cmpx_ge_i16_e64 v255, v255            ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ge_i16_e64 s1, s2
+// GFX12: v_cmpx_ge_i16_e64 s1, s2                ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ge_i16_e64 s105, s105
+// GFX12: v_cmpx_ge_i16_e64 s105, s105            ; encoding: [0x7e,0x00,0xb6,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ge_i16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ge_i16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xb6,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ge_i16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_ge_i16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xb6,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_i16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ge_i16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xb6,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ge_i16_e64 m0, 0.5
+// GFX12: v_cmpx_ge_i16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xb6,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ge_i16_e64 exec_lo, -1
+// GFX12: v_cmpx_ge_i16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xb6,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ge_i16_e64 exec_hi, null
+// GFX12: v_cmpx_ge_i16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xb6,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ge_i16_e64 null, exec_lo
+// GFX12: v_cmpx_ge_i16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xb6,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ge_i16_e64 -1, exec_hi
+// GFX12: v_cmpx_ge_i16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xb6,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ge_i16_e64 0.5, m0
+// GFX12: v_cmpx_ge_i16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xb6,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_ge_i16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_ge_i16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xb6,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_ge_i16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_ge_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_i32_e64 v1, v2
+// GFX12: v_cmpx_ge_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ge_i32_e64 v255, v255
+// GFX12: v_cmpx_ge_i32_e64 v255, v255            ; encoding: [0x7e,0x00,0xc6,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ge_i32_e64 s1, s2
+// GFX12: v_cmpx_ge_i32_e64 s1, s2                ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ge_i32_e64 s105, s105
+// GFX12: v_cmpx_ge_i32_e64 s105, s105            ; encoding: [0x7e,0x00,0xc6,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ge_i32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ge_i32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xc6,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ge_i32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_ge_i32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xc6,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_i32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ge_i32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xc6,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ge_i32_e64 m0, 0.5
+// GFX12: v_cmpx_ge_i32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xc6,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ge_i32_e64 exec_lo, -1
+// GFX12: v_cmpx_ge_i32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xc6,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ge_i32_e64 exec_hi, null
+// GFX12: v_cmpx_ge_i32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xc6,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ge_i32_e64 null, exec_lo
+// GFX12: v_cmpx_ge_i32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xc6,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ge_i32_e64 -1, exec_hi
+// GFX12: v_cmpx_ge_i32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xc6,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ge_i32_e64 0.5, m0
+// GFX12: v_cmpx_ge_i32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xc6,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_ge_i32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_ge_i32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xc6,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_ge_i32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_ge_i32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xc6,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_i64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_ge_i64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xd6,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ge_i64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_ge_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd6,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_ge_i64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_ge_i64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xd6,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_ge_i64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_ge_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd6,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_ge_i64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_ge_i64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xd6,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_ge_i64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_ge_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd6,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_i64_e64 exec, src_scc
+// GFX12: v_cmpx_ge_i64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xd6,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_ge_i64_e64 null, 0.5
+// GFX12: v_cmpx_ge_i64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xd6,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_ge_i64_e64 -1, -1
+// GFX12: v_cmpx_ge_i64_e64 -1, -1                ; encoding: [0x7e,0x00,0xd6,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_ge_i64_e64 0.5, null
+// GFX12: v_cmpx_ge_i64_e64 0.5, null             ; encoding: [0x7e,0x00,0xd6,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_ge_i64_e64 src_scc, exec
+// GFX12: v_cmpx_ge_i64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xd6,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_ge_i64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_ge_i64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xd6,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_u16_e64 v1, v2
+// GFX12: v_cmpx_ge_u16_e64 v1, v2                ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ge_u16_e64 v255, v255
+// GFX12: v_cmpx_ge_u16_e64 v255, v255            ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ge_u16_e64 s1, s2
+// GFX12: v_cmpx_ge_u16_e64 s1, s2                ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ge_u16_e64 s105, s105
+// GFX12: v_cmpx_ge_u16_e64 s105, s105            ; encoding: [0x7e,0x00,0xbe,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ge_u16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ge_u16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xbe,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ge_u16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_ge_u16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xbe,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_u16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ge_u16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xbe,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ge_u16_e64 m0, 0.5
+// GFX12: v_cmpx_ge_u16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xbe,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ge_u16_e64 exec_lo, -1
+// GFX12: v_cmpx_ge_u16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xbe,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ge_u16_e64 exec_hi, null
+// GFX12: v_cmpx_ge_u16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xbe,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ge_u16_e64 null, exec_lo
+// GFX12: v_cmpx_ge_u16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xbe,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ge_u16_e64 -1, exec_hi
+// GFX12: v_cmpx_ge_u16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xbe,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ge_u16_e64 0.5, m0
+// GFX12: v_cmpx_ge_u16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xbe,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_ge_u16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_ge_u16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xbe,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_ge_u16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_ge_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_u32_e64 v1, v2
+// GFX12: v_cmpx_ge_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ge_u32_e64 v255, v255
+// GFX12: v_cmpx_ge_u32_e64 v255, v255            ; encoding: [0x7e,0x00,0xce,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ge_u32_e64 s1, s2
+// GFX12: v_cmpx_ge_u32_e64 s1, s2                ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ge_u32_e64 s105, s105
+// GFX12: v_cmpx_ge_u32_e64 s105, s105            ; encoding: [0x7e,0x00,0xce,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ge_u32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ge_u32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xce,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ge_u32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_ge_u32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xce,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_u32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ge_u32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xce,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ge_u32_e64 m0, 0.5
+// GFX12: v_cmpx_ge_u32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xce,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ge_u32_e64 exec_lo, -1
+// GFX12: v_cmpx_ge_u32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xce,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ge_u32_e64 exec_hi, null
+// GFX12: v_cmpx_ge_u32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xce,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ge_u32_e64 null, exec_lo
+// GFX12: v_cmpx_ge_u32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xce,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ge_u32_e64 -1, exec_hi
+// GFX12: v_cmpx_ge_u32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xce,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ge_u32_e64 0.5, m0
+// GFX12: v_cmpx_ge_u32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xce,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_ge_u32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_ge_u32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xce,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_ge_u32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_ge_u32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xce,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_u64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_ge_u64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xde,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ge_u64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_ge_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xde,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_ge_u64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_ge_u64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xde,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_ge_u64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_ge_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xde,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_ge_u64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_ge_u64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xde,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_ge_u64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_ge_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xde,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_u64_e64 exec, src_scc
+// GFX12: v_cmpx_ge_u64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xde,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_ge_u64_e64 null, 0.5
+// GFX12: v_cmpx_ge_u64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xde,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_ge_u64_e64 -1, -1
+// GFX12: v_cmpx_ge_u64_e64 -1, -1                ; encoding: [0x7e,0x00,0xde,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_ge_u64_e64 0.5, null
+// GFX12: v_cmpx_ge_u64_e64 0.5, null             ; encoding: [0x7e,0x00,0xde,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_ge_u64_e64 src_scc, exec
+// GFX12: v_cmpx_ge_u64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xde,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_ge_u64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_ge_u64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xde,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_f16_e64 v1, v2
+// GFX12: v_cmpx_gt_f16_e64 v1, v2                ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_gt_f16_e64 v255, v255
+// GFX12: v_cmpx_gt_f16_e64 v255, v255            ; encoding: [0x7e,0x00,0x84,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_gt_f16_e64 s1, s2
+// GFX12: v_cmpx_gt_f16_e64 s1, s2                ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_gt_f16_e64 s105, s105
+// GFX12: v_cmpx_gt_f16_e64 s105, s105            ; encoding: [0x7e,0x00,0x84,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_gt_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_gt_f16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x84,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_gt_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_gt_f16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0x84,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_gt_f16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x84,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_gt_f16_e64 m0, 0.5
+// GFX12: v_cmpx_gt_f16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x84,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_gt_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_gt_f16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x84,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_gt_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_gt_f16_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x84,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_gt_f16_e64 null, exec_lo
+// GFX12: v_cmpx_gt_f16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x84,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_gt_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_gt_f16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x84,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_gt_f16_e64 0.5, -m0
+// GFX12: v_cmpx_gt_f16_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x84,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_gt_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_gt_f16_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x84,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_gt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_gt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x84,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_f32_e64 v1, v2
+// GFX12: v_cmpx_gt_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_gt_f32_e64 v255, v255
+// GFX12: v_cmpx_gt_f32_e64 v255, v255            ; encoding: [0x7e,0x00,0x94,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_gt_f32_e64 s1, s2
+// GFX12: v_cmpx_gt_f32_e64 s1, s2                ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_gt_f32_e64 s105, s105
+// GFX12: v_cmpx_gt_f32_e64 s105, s105            ; encoding: [0x7e,0x00,0x94,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_gt_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_gt_f32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x94,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_gt_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_gt_f32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0x94,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_gt_f32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x94,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_gt_f32_e64 m0, 0.5
+// GFX12: v_cmpx_gt_f32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x94,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_gt_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_gt_f32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x94,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_gt_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_gt_f32_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x94,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_gt_f32_e64 null, exec_lo
+// GFX12: v_cmpx_gt_f32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x94,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_gt_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_gt_f32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x94,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_gt_f32_e64 0.5, -m0
+// GFX12: v_cmpx_gt_f32_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x94,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_gt_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_gt_f32_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x94,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_gt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_gt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x94,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_gt_f64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xa4,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_gt_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_gt_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa4,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_gt_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_gt_f64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xa4,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_gt_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_gt_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa4,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_gt_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_gt_f64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xa4,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_gt_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_gt_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa4,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_gt_f64_e64 -|exec|, src_scc      ; encoding: [0x7e,0x01,0xa4,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_gt_f64_e64 null, 0.5
+// GFX12: v_cmpx_gt_f64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xa4,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_gt_f64_e64 -1, -1
+// GFX12: v_cmpx_gt_f64_e64 -1, -1                ; encoding: [0x7e,0x00,0xa4,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_gt_f64_e64 0.5, null
+// GFX12: v_cmpx_gt_f64_e64 0.5, null             ; encoding: [0x7e,0x00,0xa4,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_gt_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_gt_f64_e64 -|src_scc|, -|exec|   ; encoding: [0x7e,0x03,0xa4,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_gt_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_gt_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa4,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_i16_e64 v1, v2
+// GFX12: v_cmpx_gt_i16_e64 v1, v2                ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_gt_i16_e64 v255, v255
+// GFX12: v_cmpx_gt_i16_e64 v255, v255            ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_gt_i16_e64 s1, s2
+// GFX12: v_cmpx_gt_i16_e64 s1, s2                ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_gt_i16_e64 s105, s105
+// GFX12: v_cmpx_gt_i16_e64 s105, s105            ; encoding: [0x7e,0x00,0xb4,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_gt_i16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_gt_i16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xb4,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_gt_i16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_gt_i16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xb4,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_i16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_gt_i16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xb4,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_gt_i16_e64 m0, 0.5
+// GFX12: v_cmpx_gt_i16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xb4,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_gt_i16_e64 exec_lo, -1
+// GFX12: v_cmpx_gt_i16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xb4,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_gt_i16_e64 exec_hi, null
+// GFX12: v_cmpx_gt_i16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xb4,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_gt_i16_e64 null, exec_lo
+// GFX12: v_cmpx_gt_i16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xb4,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_gt_i16_e64 -1, exec_hi
+// GFX12: v_cmpx_gt_i16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xb4,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_gt_i16_e64 0.5, m0
+// GFX12: v_cmpx_gt_i16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xb4,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_gt_i16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_gt_i16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xb4,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_gt_i16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_gt_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_i32_e64 v1, v2
+// GFX12: v_cmpx_gt_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_gt_i32_e64 v255, v255
+// GFX12: v_cmpx_gt_i32_e64 v255, v255            ; encoding: [0x7e,0x00,0xc4,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_gt_i32_e64 s1, s2
+// GFX12: v_cmpx_gt_i32_e64 s1, s2                ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_gt_i32_e64 s105, s105
+// GFX12: v_cmpx_gt_i32_e64 s105, s105            ; encoding: [0x7e,0x00,0xc4,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_gt_i32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_gt_i32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xc4,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_gt_i32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_gt_i32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xc4,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_i32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_gt_i32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xc4,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_gt_i32_e64 m0, 0.5
+// GFX12: v_cmpx_gt_i32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xc4,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_gt_i32_e64 exec_lo, -1
+// GFX12: v_cmpx_gt_i32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xc4,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_gt_i32_e64 exec_hi, null
+// GFX12: v_cmpx_gt_i32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xc4,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_gt_i32_e64 null, exec_lo
+// GFX12: v_cmpx_gt_i32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xc4,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_gt_i32_e64 -1, exec_hi
+// GFX12: v_cmpx_gt_i32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xc4,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_gt_i32_e64 0.5, m0
+// GFX12: v_cmpx_gt_i32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xc4,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_gt_i32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_gt_i32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xc4,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_gt_i32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_gt_i32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xc4,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_i64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_gt_i64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xd4,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_gt_i64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_gt_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd4,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_gt_i64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_gt_i64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xd4,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_gt_i64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_gt_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd4,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_gt_i64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_gt_i64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xd4,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_gt_i64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_gt_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd4,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_i64_e64 exec, src_scc
+// GFX12: v_cmpx_gt_i64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xd4,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_gt_i64_e64 null, 0.5
+// GFX12: v_cmpx_gt_i64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xd4,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_gt_i64_e64 -1, -1
+// GFX12: v_cmpx_gt_i64_e64 -1, -1                ; encoding: [0x7e,0x00,0xd4,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_gt_i64_e64 0.5, null
+// GFX12: v_cmpx_gt_i64_e64 0.5, null             ; encoding: [0x7e,0x00,0xd4,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_gt_i64_e64 src_scc, exec
+// GFX12: v_cmpx_gt_i64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xd4,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_gt_i64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_gt_i64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xd4,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_u16_e64 v1, v2
+// GFX12: v_cmpx_gt_u16_e64 v1, v2                ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_gt_u16_e64 v255, v255
+// GFX12: v_cmpx_gt_u16_e64 v255, v255            ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_gt_u16_e64 s1, s2
+// GFX12: v_cmpx_gt_u16_e64 s1, s2                ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_gt_u16_e64 s105, s105
+// GFX12: v_cmpx_gt_u16_e64 s105, s105            ; encoding: [0x7e,0x00,0xbc,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_gt_u16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_gt_u16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xbc,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_gt_u16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_gt_u16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xbc,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_u16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_gt_u16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xbc,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_gt_u16_e64 m0, 0.5
+// GFX12: v_cmpx_gt_u16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xbc,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_gt_u16_e64 exec_lo, -1
+// GFX12: v_cmpx_gt_u16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xbc,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_gt_u16_e64 exec_hi, null
+// GFX12: v_cmpx_gt_u16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xbc,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_gt_u16_e64 null, exec_lo
+// GFX12: v_cmpx_gt_u16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xbc,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_gt_u16_e64 -1, exec_hi
+// GFX12: v_cmpx_gt_u16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xbc,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_gt_u16_e64 0.5, m0
+// GFX12: v_cmpx_gt_u16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xbc,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_gt_u16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_gt_u16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xbc,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_gt_u16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_gt_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_u32_e64 v1, v2
+// GFX12: v_cmpx_gt_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_gt_u32_e64 v255, v255
+// GFX12: v_cmpx_gt_u32_e64 v255, v255            ; encoding: [0x7e,0x00,0xcc,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_gt_u32_e64 s1, s2
+// GFX12: v_cmpx_gt_u32_e64 s1, s2                ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_gt_u32_e64 s105, s105
+// GFX12: v_cmpx_gt_u32_e64 s105, s105            ; encoding: [0x7e,0x00,0xcc,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_gt_u32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_gt_u32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xcc,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_gt_u32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_gt_u32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xcc,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_u32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_gt_u32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xcc,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_gt_u32_e64 m0, 0.5
+// GFX12: v_cmpx_gt_u32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xcc,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_gt_u32_e64 exec_lo, -1
+// GFX12: v_cmpx_gt_u32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xcc,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_gt_u32_e64 exec_hi, null
+// GFX12: v_cmpx_gt_u32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xcc,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_gt_u32_e64 null, exec_lo
+// GFX12: v_cmpx_gt_u32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xcc,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_gt_u32_e64 -1, exec_hi
+// GFX12: v_cmpx_gt_u32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xcc,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_gt_u32_e64 0.5, m0
+// GFX12: v_cmpx_gt_u32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xcc,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_gt_u32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_gt_u32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xcc,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_gt_u32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_gt_u32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xcc,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_u64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_gt_u64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xdc,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_gt_u64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_gt_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xdc,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_gt_u64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_gt_u64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xdc,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_gt_u64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_gt_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xdc,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_gt_u64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_gt_u64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xdc,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_gt_u64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_gt_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xdc,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_u64_e64 exec, src_scc
+// GFX12: v_cmpx_gt_u64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xdc,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_gt_u64_e64 null, 0.5
+// GFX12: v_cmpx_gt_u64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xdc,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_gt_u64_e64 -1, -1
+// GFX12: v_cmpx_gt_u64_e64 -1, -1                ; encoding: [0x7e,0x00,0xdc,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_gt_u64_e64 0.5, null
+// GFX12: v_cmpx_gt_u64_e64 0.5, null             ; encoding: [0x7e,0x00,0xdc,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_gt_u64_e64 src_scc, exec
+// GFX12: v_cmpx_gt_u64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xdc,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_gt_u64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_gt_u64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xdc,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_f16_e64 v1, v2
+// GFX12: v_cmpx_le_f16_e64 v1, v2                ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_le_f16_e64 v255, v255
+// GFX12: v_cmpx_le_f16_e64 v255, v255            ; encoding: [0x7e,0x00,0x83,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_le_f16_e64 s1, s2
+// GFX12: v_cmpx_le_f16_e64 s1, s2                ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_le_f16_e64 s105, s105
+// GFX12: v_cmpx_le_f16_e64 s105, s105            ; encoding: [0x7e,0x00,0x83,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_le_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_le_f16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x83,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_le_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_le_f16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0x83,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_le_f16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x83,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_le_f16_e64 m0, 0.5
+// GFX12: v_cmpx_le_f16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x83,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_le_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_le_f16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x83,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_le_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_le_f16_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x83,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_le_f16_e64 null, exec_lo
+// GFX12: v_cmpx_le_f16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x83,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_le_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_le_f16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x83,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_le_f16_e64 0.5, -m0
+// GFX12: v_cmpx_le_f16_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x83,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_le_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_le_f16_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x83,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_le_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_le_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x83,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_f32_e64 v1, v2
+// GFX12: v_cmpx_le_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_le_f32_e64 v255, v255
+// GFX12: v_cmpx_le_f32_e64 v255, v255            ; encoding: [0x7e,0x00,0x93,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_le_f32_e64 s1, s2
+// GFX12: v_cmpx_le_f32_e64 s1, s2                ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_le_f32_e64 s105, s105
+// GFX12: v_cmpx_le_f32_e64 s105, s105            ; encoding: [0x7e,0x00,0x93,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_le_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_le_f32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x93,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_le_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_le_f32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0x93,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_le_f32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x93,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_le_f32_e64 m0, 0.5
+// GFX12: v_cmpx_le_f32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x93,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_le_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_le_f32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x93,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_le_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_le_f32_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x93,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_le_f32_e64 null, exec_lo
+// GFX12: v_cmpx_le_f32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x93,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_le_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_le_f32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x93,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_le_f32_e64 0.5, -m0
+// GFX12: v_cmpx_le_f32_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x93,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_le_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_le_f32_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x93,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_le_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_le_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x93,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_le_f64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xa3,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_le_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_le_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa3,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_le_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_le_f64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xa3,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_le_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_le_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa3,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_le_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_le_f64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xa3,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_le_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_le_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa3,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_le_f64_e64 -|exec|, src_scc      ; encoding: [0x7e,0x01,0xa3,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_le_f64_e64 null, 0.5
+// GFX12: v_cmpx_le_f64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xa3,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_le_f64_e64 -1, -1
+// GFX12: v_cmpx_le_f64_e64 -1, -1                ; encoding: [0x7e,0x00,0xa3,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_le_f64_e64 0.5, null
+// GFX12: v_cmpx_le_f64_e64 0.5, null             ; encoding: [0x7e,0x00,0xa3,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_le_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_le_f64_e64 -|src_scc|, -|exec|   ; encoding: [0x7e,0x03,0xa3,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_le_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_le_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa3,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_i16_e64 v1, v2
+// GFX12: v_cmpx_le_i16_e64 v1, v2                ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_le_i16_e64 v255, v255
+// GFX12: v_cmpx_le_i16_e64 v255, v255            ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_le_i16_e64 s1, s2
+// GFX12: v_cmpx_le_i16_e64 s1, s2                ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_le_i16_e64 s105, s105
+// GFX12: v_cmpx_le_i16_e64 s105, s105            ; encoding: [0x7e,0x00,0xb3,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_le_i16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_le_i16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xb3,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_le_i16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_le_i16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xb3,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_i16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_le_i16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xb3,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_le_i16_e64 m0, 0.5
+// GFX12: v_cmpx_le_i16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xb3,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_le_i16_e64 exec_lo, -1
+// GFX12: v_cmpx_le_i16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xb3,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_le_i16_e64 exec_hi, null
+// GFX12: v_cmpx_le_i16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xb3,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_le_i16_e64 null, exec_lo
+// GFX12: v_cmpx_le_i16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xb3,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_le_i16_e64 -1, exec_hi
+// GFX12: v_cmpx_le_i16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xb3,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_le_i16_e64 0.5, m0
+// GFX12: v_cmpx_le_i16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xb3,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_le_i16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_le_i16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xb3,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_le_i16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_le_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_i32_e64 v1, v2
+// GFX12: v_cmpx_le_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_le_i32_e64 v255, v255
+// GFX12: v_cmpx_le_i32_e64 v255, v255            ; encoding: [0x7e,0x00,0xc3,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_le_i32_e64 s1, s2
+// GFX12: v_cmpx_le_i32_e64 s1, s2                ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_le_i32_e64 s105, s105
+// GFX12: v_cmpx_le_i32_e64 s105, s105            ; encoding: [0x7e,0x00,0xc3,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_le_i32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_le_i32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xc3,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_le_i32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_le_i32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xc3,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_i32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_le_i32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xc3,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_le_i32_e64 m0, 0.5
+// GFX12: v_cmpx_le_i32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xc3,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_le_i32_e64 exec_lo, -1
+// GFX12: v_cmpx_le_i32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xc3,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_le_i32_e64 exec_hi, null
+// GFX12: v_cmpx_le_i32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xc3,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_le_i32_e64 null, exec_lo
+// GFX12: v_cmpx_le_i32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xc3,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_le_i32_e64 -1, exec_hi
+// GFX12: v_cmpx_le_i32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xc3,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_le_i32_e64 0.5, m0
+// GFX12: v_cmpx_le_i32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xc3,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_le_i32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_le_i32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xc3,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_le_i32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_le_i32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xc3,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_i64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_le_i64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xd3,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_le_i64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_le_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd3,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_le_i64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_le_i64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xd3,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_le_i64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_le_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd3,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_le_i64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_le_i64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xd3,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_le_i64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_le_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd3,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_i64_e64 exec, src_scc
+// GFX12: v_cmpx_le_i64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xd3,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_le_i64_e64 null, 0.5
+// GFX12: v_cmpx_le_i64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xd3,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_le_i64_e64 -1, -1
+// GFX12: v_cmpx_le_i64_e64 -1, -1                ; encoding: [0x7e,0x00,0xd3,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_le_i64_e64 0.5, null
+// GFX12: v_cmpx_le_i64_e64 0.5, null             ; encoding: [0x7e,0x00,0xd3,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_le_i64_e64 src_scc, exec
+// GFX12: v_cmpx_le_i64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xd3,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_le_i64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_le_i64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xd3,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_u16_e64 v1, v2
+// GFX12: v_cmpx_le_u16_e64 v1, v2                ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_le_u16_e64 v255, v255
+// GFX12: v_cmpx_le_u16_e64 v255, v255            ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_le_u16_e64 s1, s2
+// GFX12: v_cmpx_le_u16_e64 s1, s2                ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_le_u16_e64 s105, s105
+// GFX12: v_cmpx_le_u16_e64 s105, s105            ; encoding: [0x7e,0x00,0xbb,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_le_u16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_le_u16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xbb,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_le_u16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_le_u16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xbb,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_u16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_le_u16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xbb,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_le_u16_e64 m0, 0.5
+// GFX12: v_cmpx_le_u16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xbb,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_le_u16_e64 exec_lo, -1
+// GFX12: v_cmpx_le_u16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xbb,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_le_u16_e64 exec_hi, null
+// GFX12: v_cmpx_le_u16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xbb,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_le_u16_e64 null, exec_lo
+// GFX12: v_cmpx_le_u16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xbb,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_le_u16_e64 -1, exec_hi
+// GFX12: v_cmpx_le_u16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xbb,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_le_u16_e64 0.5, m0
+// GFX12: v_cmpx_le_u16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xbb,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_le_u16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_le_u16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xbb,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_le_u16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_le_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_u32_e64 v1, v2
+// GFX12: v_cmpx_le_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_le_u32_e64 v255, v255
+// GFX12: v_cmpx_le_u32_e64 v255, v255            ; encoding: [0x7e,0x00,0xcb,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_le_u32_e64 s1, s2
+// GFX12: v_cmpx_le_u32_e64 s1, s2                ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_le_u32_e64 s105, s105
+// GFX12: v_cmpx_le_u32_e64 s105, s105            ; encoding: [0x7e,0x00,0xcb,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_le_u32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_le_u32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xcb,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_le_u32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_le_u32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xcb,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_u32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_le_u32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xcb,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_le_u32_e64 m0, 0.5
+// GFX12: v_cmpx_le_u32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xcb,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_le_u32_e64 exec_lo, -1
+// GFX12: v_cmpx_le_u32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xcb,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_le_u32_e64 exec_hi, null
+// GFX12: v_cmpx_le_u32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xcb,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_le_u32_e64 null, exec_lo
+// GFX12: v_cmpx_le_u32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xcb,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_le_u32_e64 -1, exec_hi
+// GFX12: v_cmpx_le_u32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xcb,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_le_u32_e64 0.5, m0
+// GFX12: v_cmpx_le_u32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xcb,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_le_u32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_le_u32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xcb,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_le_u32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_le_u32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xcb,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_u64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_le_u64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xdb,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_le_u64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_le_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xdb,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_le_u64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_le_u64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xdb,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_le_u64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_le_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xdb,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_le_u64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_le_u64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xdb,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_le_u64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_le_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xdb,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_u64_e64 exec, src_scc
+// GFX12: v_cmpx_le_u64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xdb,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_le_u64_e64 null, 0.5
+// GFX12: v_cmpx_le_u64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xdb,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_le_u64_e64 -1, -1
+// GFX12: v_cmpx_le_u64_e64 -1, -1                ; encoding: [0x7e,0x00,0xdb,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_le_u64_e64 0.5, null
+// GFX12: v_cmpx_le_u64_e64 0.5, null             ; encoding: [0x7e,0x00,0xdb,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_le_u64_e64 src_scc, exec
+// GFX12: v_cmpx_le_u64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xdb,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_le_u64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_le_u64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xdb,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lg_f16_e64 v1, v2
+// GFX12: v_cmpx_lg_f16_e64 v1, v2                ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lg_f16_e64 v255, v255
+// GFX12: v_cmpx_lg_f16_e64 v255, v255            ; encoding: [0x7e,0x00,0x85,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_lg_f16_e64 s1, s2
+// GFX12: v_cmpx_lg_f16_e64 s1, s2                ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_lg_f16_e64 s105, s105
+// GFX12: v_cmpx_lg_f16_e64 s105, s105            ; encoding: [0x7e,0x00,0x85,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_lg_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_lg_f16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x85,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_lg_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_lg_f16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0x85,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lg_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_lg_f16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x85,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_lg_f16_e64 m0, 0.5
+// GFX12: v_cmpx_lg_f16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x85,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_lg_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_lg_f16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x85,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_lg_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_lg_f16_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x85,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_lg_f16_e64 null, exec_lo
+// GFX12: v_cmpx_lg_f16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x85,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_lg_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_lg_f16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x85,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_lg_f16_e64 0.5, -m0
+// GFX12: v_cmpx_lg_f16_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x85,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_lg_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_lg_f16_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x85,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_lg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_lg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x85,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lg_f32_e64 v1, v2
+// GFX12: v_cmpx_lg_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lg_f32_e64 v255, v255
+// GFX12: v_cmpx_lg_f32_e64 v255, v255            ; encoding: [0x7e,0x00,0x95,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_lg_f32_e64 s1, s2
+// GFX12: v_cmpx_lg_f32_e64 s1, s2                ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_lg_f32_e64 s105, s105
+// GFX12: v_cmpx_lg_f32_e64 s105, s105            ; encoding: [0x7e,0x00,0x95,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_lg_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_lg_f32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x95,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_lg_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_lg_f32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0x95,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lg_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_lg_f32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x95,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_lg_f32_e64 m0, 0.5
+// GFX12: v_cmpx_lg_f32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x95,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_lg_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_lg_f32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x95,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_lg_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_lg_f32_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x95,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_lg_f32_e64 null, exec_lo
+// GFX12: v_cmpx_lg_f32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x95,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_lg_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_lg_f32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x95,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_lg_f32_e64 0.5, -m0
+// GFX12: v_cmpx_lg_f32_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x95,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_lg_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_lg_f32_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x95,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_lg_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_lg_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x95,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lg_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_lg_f64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xa5,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lg_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_lg_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa5,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_lg_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_lg_f64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xa5,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_lg_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_lg_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa5,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_lg_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_lg_f64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xa5,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_lg_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_lg_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa5,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lg_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_lg_f64_e64 -|exec|, src_scc      ; encoding: [0x7e,0x01,0xa5,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_lg_f64_e64 null, 0.5
+// GFX12: v_cmpx_lg_f64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xa5,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_lg_f64_e64 -1, -1
+// GFX12: v_cmpx_lg_f64_e64 -1, -1                ; encoding: [0x7e,0x00,0xa5,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_lg_f64_e64 0.5, null
+// GFX12: v_cmpx_lg_f64_e64 0.5, null             ; encoding: [0x7e,0x00,0xa5,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_lg_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_lg_f64_e64 -|src_scc|, -|exec|   ; encoding: [0x7e,0x03,0xa5,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa5,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_f16_e64 v1, v2
+// GFX12: v_cmpx_lt_f16_e64 v1, v2                ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lt_f16_e64 v255, v255
+// GFX12: v_cmpx_lt_f16_e64 v255, v255            ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_lt_f16_e64 s1, s2
+// GFX12: v_cmpx_lt_f16_e64 s1, s2                ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_lt_f16_e64 s105, s105
+// GFX12: v_cmpx_lt_f16_e64 s105, s105            ; encoding: [0x7e,0x00,0x81,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_lt_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_lt_f16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x81,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_lt_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_lt_f16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0x81,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_lt_f16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x81,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_lt_f16_e64 m0, 0.5
+// GFX12: v_cmpx_lt_f16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x81,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_lt_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_lt_f16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x81,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_lt_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_lt_f16_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x81,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_lt_f16_e64 null, exec_lo
+// GFX12: v_cmpx_lt_f16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x81,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_lt_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_lt_f16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x81,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_lt_f16_e64 0.5, -m0
+// GFX12: v_cmpx_lt_f16_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x81,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_lt_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_lt_f16_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x81,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_f32_e64 v1, v2
+// GFX12: v_cmpx_lt_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lt_f32_e64 v255, v255
+// GFX12: v_cmpx_lt_f32_e64 v255, v255            ; encoding: [0x7e,0x00,0x91,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_lt_f32_e64 s1, s2
+// GFX12: v_cmpx_lt_f32_e64 s1, s2                ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_lt_f32_e64 s105, s105
+// GFX12: v_cmpx_lt_f32_e64 s105, s105            ; encoding: [0x7e,0x00,0x91,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_lt_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_lt_f32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0x91,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_lt_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_lt_f32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0x91,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_lt_f32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0x91,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_lt_f32_e64 m0, 0.5
+// GFX12: v_cmpx_lt_f32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0x91,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_lt_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_lt_f32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0x91,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_lt_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_lt_f32_e64 |exec_hi|, null       ; encoding: [0x7e,0x01,0x91,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_lt_f32_e64 null, exec_lo
+// GFX12: v_cmpx_lt_f32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0x91,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_lt_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_lt_f32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0x91,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_lt_f32_e64 0.5, -m0
+// GFX12: v_cmpx_lt_f32_e64 0.5, -m0              ; encoding: [0x7e,0x00,0x91,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_lt_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_lt_f32_e64 -src_scc, |vcc_lo|    ; encoding: [0x7e,0x02,0x91,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_lt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_lt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x91,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_lt_f64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xa1,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lt_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_lt_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa1,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_lt_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_lt_f64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xa1,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_lt_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_lt_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa1,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_lt_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_lt_f64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xa1,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_lt_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_lt_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa1,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_lt_f64_e64 -|exec|, src_scc      ; encoding: [0x7e,0x01,0xa1,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_lt_f64_e64 null, 0.5
+// GFX12: v_cmpx_lt_f64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xa1,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_lt_f64_e64 -1, -1
+// GFX12: v_cmpx_lt_f64_e64 -1, -1                ; encoding: [0x7e,0x00,0xa1,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_lt_f64_e64 0.5, null
+// GFX12: v_cmpx_lt_f64_e64 0.5, null             ; encoding: [0x7e,0x00,0xa1,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_lt_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_lt_f64_e64 -|src_scc|, -|exec|   ; encoding: [0x7e,0x03,0xa1,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_lt_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_lt_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa1,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_i16_e64 v1, v2
+// GFX12: v_cmpx_lt_i16_e64 v1, v2                ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lt_i16_e64 v255, v255
+// GFX12: v_cmpx_lt_i16_e64 v255, v255            ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_lt_i16_e64 s1, s2
+// GFX12: v_cmpx_lt_i16_e64 s1, s2                ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_lt_i16_e64 s105, s105
+// GFX12: v_cmpx_lt_i16_e64 s105, s105            ; encoding: [0x7e,0x00,0xb1,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_lt_i16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_lt_i16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xb1,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_lt_i16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_lt_i16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xb1,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_i16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_lt_i16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xb1,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_lt_i16_e64 m0, 0.5
+// GFX12: v_cmpx_lt_i16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xb1,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_lt_i16_e64 exec_lo, -1
+// GFX12: v_cmpx_lt_i16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xb1,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_lt_i16_e64 exec_hi, null
+// GFX12: v_cmpx_lt_i16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xb1,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_lt_i16_e64 null, exec_lo
+// GFX12: v_cmpx_lt_i16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xb1,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_lt_i16_e64 -1, exec_hi
+// GFX12: v_cmpx_lt_i16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xb1,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_lt_i16_e64 0.5, m0
+// GFX12: v_cmpx_lt_i16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xb1,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_lt_i16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_lt_i16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xb1,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_lt_i16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_lt_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_i32_e64 v1, v2
+// GFX12: v_cmpx_lt_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lt_i32_e64 v255, v255
+// GFX12: v_cmpx_lt_i32_e64 v255, v255            ; encoding: [0x7e,0x00,0xc1,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_lt_i32_e64 s1, s2
+// GFX12: v_cmpx_lt_i32_e64 s1, s2                ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_lt_i32_e64 s105, s105
+// GFX12: v_cmpx_lt_i32_e64 s105, s105            ; encoding: [0x7e,0x00,0xc1,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_lt_i32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_lt_i32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xc1,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_lt_i32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_lt_i32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xc1,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_i32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_lt_i32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xc1,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_lt_i32_e64 m0, 0.5
+// GFX12: v_cmpx_lt_i32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xc1,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_lt_i32_e64 exec_lo, -1
+// GFX12: v_cmpx_lt_i32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xc1,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_lt_i32_e64 exec_hi, null
+// GFX12: v_cmpx_lt_i32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xc1,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_lt_i32_e64 null, exec_lo
+// GFX12: v_cmpx_lt_i32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xc1,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_lt_i32_e64 -1, exec_hi
+// GFX12: v_cmpx_lt_i32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xc1,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_lt_i32_e64 0.5, m0
+// GFX12: v_cmpx_lt_i32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xc1,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_lt_i32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_lt_i32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xc1,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_lt_i32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_lt_i32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xc1,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_i64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_lt_i64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xd1,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lt_i64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_lt_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd1,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_lt_i64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_lt_i64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xd1,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_lt_i64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_lt_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd1,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_lt_i64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_lt_i64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xd1,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_lt_i64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_lt_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd1,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_i64_e64 exec, src_scc
+// GFX12: v_cmpx_lt_i64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xd1,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_lt_i64_e64 null, 0.5
+// GFX12: v_cmpx_lt_i64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xd1,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_lt_i64_e64 -1, -1
+// GFX12: v_cmpx_lt_i64_e64 -1, -1                ; encoding: [0x7e,0x00,0xd1,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_lt_i64_e64 0.5, null
+// GFX12: v_cmpx_lt_i64_e64 0.5, null             ; encoding: [0x7e,0x00,0xd1,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_lt_i64_e64 src_scc, exec
+// GFX12: v_cmpx_lt_i64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xd1,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_lt_i64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_lt_i64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xd1,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_u16_e64 v1, v2
+// GFX12: v_cmpx_lt_u16_e64 v1, v2                ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lt_u16_e64 v255, v255
+// GFX12: v_cmpx_lt_u16_e64 v255, v255            ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_lt_u16_e64 s1, s2
+// GFX12: v_cmpx_lt_u16_e64 s1, s2                ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_lt_u16_e64 s105, s105
+// GFX12: v_cmpx_lt_u16_e64 s105, s105            ; encoding: [0x7e,0x00,0xb9,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_lt_u16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_lt_u16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xb9,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_lt_u16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_lt_u16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xb9,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_u16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_lt_u16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xb9,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_lt_u16_e64 m0, 0.5
+// GFX12: v_cmpx_lt_u16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xb9,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_lt_u16_e64 exec_lo, -1
+// GFX12: v_cmpx_lt_u16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xb9,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_lt_u16_e64 exec_hi, null
+// GFX12: v_cmpx_lt_u16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xb9,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_lt_u16_e64 null, exec_lo
+// GFX12: v_cmpx_lt_u16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xb9,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_lt_u16_e64 -1, exec_hi
+// GFX12: v_cmpx_lt_u16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xb9,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_lt_u16_e64 0.5, m0
+// GFX12: v_cmpx_lt_u16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xb9,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_lt_u16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_lt_u16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xb9,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_lt_u16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_lt_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_u32_e64 v1, v2
+// GFX12: v_cmpx_lt_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lt_u32_e64 v255, v255
+// GFX12: v_cmpx_lt_u32_e64 v255, v255            ; encoding: [0x7e,0x00,0xc9,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_lt_u32_e64 s1, s2
+// GFX12: v_cmpx_lt_u32_e64 s1, s2                ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_lt_u32_e64 s105, s105
+// GFX12: v_cmpx_lt_u32_e64 s105, s105            ; encoding: [0x7e,0x00,0xc9,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_lt_u32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_lt_u32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xc9,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_lt_u32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_lt_u32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xc9,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_u32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_lt_u32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xc9,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_lt_u32_e64 m0, 0.5
+// GFX12: v_cmpx_lt_u32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xc9,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_lt_u32_e64 exec_lo, -1
+// GFX12: v_cmpx_lt_u32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xc9,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_lt_u32_e64 exec_hi, null
+// GFX12: v_cmpx_lt_u32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xc9,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_lt_u32_e64 null, exec_lo
+// GFX12: v_cmpx_lt_u32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xc9,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_lt_u32_e64 -1, exec_hi
+// GFX12: v_cmpx_lt_u32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xc9,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_lt_u32_e64 0.5, m0
+// GFX12: v_cmpx_lt_u32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xc9,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_lt_u32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_lt_u32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xc9,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_lt_u32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_lt_u32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xc9,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_u64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_lt_u64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xd9,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_lt_u64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_lt_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd9,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_lt_u64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_lt_u64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xd9,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_lt_u64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_lt_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd9,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_lt_u64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_lt_u64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xd9,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_lt_u64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_lt_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd9,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_u64_e64 exec, src_scc
+// GFX12: v_cmpx_lt_u64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xd9,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_lt_u64_e64 null, 0.5
+// GFX12: v_cmpx_lt_u64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xd9,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_lt_u64_e64 -1, -1
+// GFX12: v_cmpx_lt_u64_e64 -1, -1                ; encoding: [0x7e,0x00,0xd9,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_lt_u64_e64 0.5, null
+// GFX12: v_cmpx_lt_u64_e64 0.5, null             ; encoding: [0x7e,0x00,0xd9,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_lt_u64_e64 src_scc, exec
+// GFX12: v_cmpx_lt_u64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xd9,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_lt_u64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_lt_u64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xd9,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_i16_e64 v1, v2
+// GFX12: v_cmpx_ne_i16_e64 v1, v2                ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ne_i16_e64 v255, v255
+// GFX12: v_cmpx_ne_i16_e64 v255, v255            ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ne_i16_e64 s1, s2
+// GFX12: v_cmpx_ne_i16_e64 s1, s2                ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ne_i16_e64 s105, s105
+// GFX12: v_cmpx_ne_i16_e64 s105, s105            ; encoding: [0x7e,0x00,0xb5,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ne_i16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ne_i16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xb5,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ne_i16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_ne_i16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xb5,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ne_i16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ne_i16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xb5,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ne_i16_e64 m0, 0.5
+// GFX12: v_cmpx_ne_i16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xb5,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ne_i16_e64 exec_lo, -1
+// GFX12: v_cmpx_ne_i16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xb5,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ne_i16_e64 exec_hi, null
+// GFX12: v_cmpx_ne_i16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xb5,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ne_i16_e64 null, exec_lo
+// GFX12: v_cmpx_ne_i16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xb5,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ne_i16_e64 -1, exec_hi
+// GFX12: v_cmpx_ne_i16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xb5,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ne_i16_e64 0.5, m0
+// GFX12: v_cmpx_ne_i16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xb5,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_ne_i16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_ne_i16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xb5,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_ne_i16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_ne_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ne_i32_e64 v1, v2
+// GFX12: v_cmpx_ne_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ne_i32_e64 v255, v255
+// GFX12: v_cmpx_ne_i32_e64 v255, v255            ; encoding: [0x7e,0x00,0xc5,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ne_i32_e64 s1, s2
+// GFX12: v_cmpx_ne_i32_e64 s1, s2                ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ne_i32_e64 s105, s105
+// GFX12: v_cmpx_ne_i32_e64 s105, s105            ; encoding: [0x7e,0x00,0xc5,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ne_i32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ne_i32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xc5,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ne_i32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_ne_i32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xc5,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_i32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ne_i32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xc5,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ne_i32_e64 m0, 0.5
+// GFX12: v_cmpx_ne_i32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xc5,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ne_i32_e64 exec_lo, -1
+// GFX12: v_cmpx_ne_i32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xc5,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ne_i32_e64 exec_hi, null
+// GFX12: v_cmpx_ne_i32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xc5,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ne_i32_e64 null, exec_lo
+// GFX12: v_cmpx_ne_i32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xc5,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ne_i32_e64 -1, exec_hi
+// GFX12: v_cmpx_ne_i32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xc5,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ne_i32_e64 0.5, m0
+// GFX12: v_cmpx_ne_i32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xc5,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_ne_i32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_ne_i32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xc5,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_ne_i32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_ne_i32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xc5,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_i64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_ne_i64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xd5,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ne_i64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_ne_i64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xd5,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_ne_i64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_ne_i64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xd5,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_ne_i64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_ne_i64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xd5,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_ne_i64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_ne_i64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xd5,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_ne_i64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_ne_i64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xd5,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_i64_e64 exec, src_scc
+// GFX12: v_cmpx_ne_i64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xd5,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_ne_i64_e64 null, 0.5
+// GFX12: v_cmpx_ne_i64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xd5,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_ne_i64_e64 -1, -1
+// GFX12: v_cmpx_ne_i64_e64 -1, -1                ; encoding: [0x7e,0x00,0xd5,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_ne_i64_e64 0.5, null
+// GFX12: v_cmpx_ne_i64_e64 0.5, null             ; encoding: [0x7e,0x00,0xd5,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_ne_i64_e64 src_scc, exec
+// GFX12: v_cmpx_ne_i64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xd5,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_ne_i64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_ne_i64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xd5,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_u16_e64 v1, v2
+// GFX12: v_cmpx_ne_u16_e64 v1, v2                ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ne_u16_e64 v255, v255
+// GFX12: v_cmpx_ne_u16_e64 v255, v255            ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ne_u16_e64 s1, s2
+// GFX12: v_cmpx_ne_u16_e64 s1, s2                ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ne_u16_e64 s105, s105
+// GFX12: v_cmpx_ne_u16_e64 s105, s105            ; encoding: [0x7e,0x00,0xbd,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ne_u16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ne_u16_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xbd,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ne_u16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_ne_u16_e64 vcc_hi, 0xfe0b        ; encoding: [0x7e,0x00,0xbd,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ne_u16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ne_u16_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xbd,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ne_u16_e64 m0, 0.5
+// GFX12: v_cmpx_ne_u16_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xbd,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ne_u16_e64 exec_lo, -1
+// GFX12: v_cmpx_ne_u16_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xbd,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ne_u16_e64 exec_hi, null
+// GFX12: v_cmpx_ne_u16_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xbd,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ne_u16_e64 null, exec_lo
+// GFX12: v_cmpx_ne_u16_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xbd,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ne_u16_e64 -1, exec_hi
+// GFX12: v_cmpx_ne_u16_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xbd,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ne_u16_e64 0.5, m0
+// GFX12: v_cmpx_ne_u16_e64 0.5, m0               ; encoding: [0x7e,0x00,0xbd,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_ne_u16_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_ne_u16_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xbd,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_ne_u16_e64 0xfe0b, vcc_hi
+// GFX12: v_cmpx_ne_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ne_u32_e64 v1, v2
+// GFX12: v_cmpx_ne_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ne_u32_e64 v255, v255
+// GFX12: v_cmpx_ne_u32_e64 v255, v255            ; encoding: [0x7e,0x00,0xcd,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ne_u32_e64 s1, s2
+// GFX12: v_cmpx_ne_u32_e64 s1, s2                ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ne_u32_e64 s105, s105
+// GFX12: v_cmpx_ne_u32_e64 s105, s105            ; encoding: [0x7e,0x00,0xcd,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ne_u32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ne_u32_e64 vcc_lo, ttmp15        ; encoding: [0x7e,0x00,0xcd,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ne_u32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_ne_u32_e64 vcc_hi, 0xaf123456    ; encoding: [0x7e,0x00,0xcd,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_u32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ne_u32_e64 ttmp15, src_scc       ; encoding: [0x7e,0x00,0xcd,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ne_u32_e64 m0, 0.5
+// GFX12: v_cmpx_ne_u32_e64 m0, 0.5               ; encoding: [0x7e,0x00,0xcd,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ne_u32_e64 exec_lo, -1
+// GFX12: v_cmpx_ne_u32_e64 exec_lo, -1           ; encoding: [0x7e,0x00,0xcd,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ne_u32_e64 exec_hi, null
+// GFX12: v_cmpx_ne_u32_e64 exec_hi, null         ; encoding: [0x7e,0x00,0xcd,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ne_u32_e64 null, exec_lo
+// GFX12: v_cmpx_ne_u32_e64 null, exec_lo         ; encoding: [0x7e,0x00,0xcd,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ne_u32_e64 -1, exec_hi
+// GFX12: v_cmpx_ne_u32_e64 -1, exec_hi           ; encoding: [0x7e,0x00,0xcd,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ne_u32_e64 0.5, m0
+// GFX12: v_cmpx_ne_u32_e64 0.5, m0               ; encoding: [0x7e,0x00,0xcd,0xd4,0xf0,0xfa,0x00,0x00]
+
+v_cmpx_ne_u32_e64 src_scc, vcc_lo
+// GFX12: v_cmpx_ne_u32_e64 src_scc, vcc_lo       ; encoding: [0x7e,0x00,0xcd,0xd4,0xfd,0xd4,0x00,0x00]
+
+v_cmpx_ne_u32_e64 0xaf123456, vcc_hi
+// GFX12: v_cmpx_ne_u32_e64 0xaf123456, vcc_hi    ; encoding: [0x7e,0x00,0xcd,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_u64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_ne_u64_e64 v[1:2], v[2:3]        ; encoding: [0x7e,0x00,0xdd,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ne_u64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_ne_u64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xdd,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_ne_u64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_ne_u64_e64 s[2:3], s[4:5]        ; encoding: [0x7e,0x00,0xdd,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_ne_u64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_ne_u64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xdd,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_ne_u64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_ne_u64_e64 vcc, ttmp[14:15]      ; encoding: [0x7e,0x00,0xdd,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_ne_u64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_ne_u64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xdd,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_u64_e64 exec, src_scc
+// GFX12: v_cmpx_ne_u64_e64 exec, src_scc         ; encoding: [0x7e,0x00,0xdd,0xd4,0x7e,0xfa,0x01,0x00]
+
+v_cmpx_ne_u64_e64 null, 0.5
+// GFX12: v_cmpx_ne_u64_e64 null, 0.5             ; encoding: [0x7e,0x00,0xdd,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_ne_u64_e64 -1, -1
+// GFX12: v_cmpx_ne_u64_e64 -1, -1                ; encoding: [0x7e,0x00,0xdd,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_ne_u64_e64 0.5, null
+// GFX12: v_cmpx_ne_u64_e64 0.5, null             ; encoding: [0x7e,0x00,0xdd,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_ne_u64_e64 src_scc, exec
+// GFX12: v_cmpx_ne_u64_e64 src_scc, exec         ; encoding: [0x7e,0x00,0xdd,0xd4,0xfd,0xfc,0x00,0x00]
+
+v_cmpx_ne_u64_e64 0xaf123456, vcc
+// GFX12: v_cmpx_ne_u64_e64 0xaf123456, vcc       ; encoding: [0x7e,0x00,0xdd,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_neq_f16_e64 v1, v2
+// GFX12: v_cmpx_neq_f16_e64 v1, v2               ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_neq_f16_e64 v255, v255
+// GFX12: v_cmpx_neq_f16_e64 v255, v255           ; encoding: [0x7e,0x00,0x8d,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_neq_f16_e64 s1, s2
+// GFX12: v_cmpx_neq_f16_e64 s1, s2               ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_neq_f16_e64 s105, s105
+// GFX12: v_cmpx_neq_f16_e64 s105, s105           ; encoding: [0x7e,0x00,0x8d,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_neq_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_neq_f16_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x8d,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_neq_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_neq_f16_e64 vcc_hi, 0xfe0b       ; encoding: [0x7e,0x00,0x8d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_neq_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_neq_f16_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x8d,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_neq_f16_e64 m0, 0.5
+// GFX12: v_cmpx_neq_f16_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x8d,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_neq_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_neq_f16_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x8d,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_neq_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_neq_f16_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x8d,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_neq_f16_e64 null, exec_lo
+// GFX12: v_cmpx_neq_f16_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x8d,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_neq_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_neq_f16_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x8d,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_neq_f16_e64 0.5, -m0
+// GFX12: v_cmpx_neq_f16_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x8d,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_neq_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_neq_f16_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x8d,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_neq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_neq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_neq_f32_e64 v1, v2
+// GFX12: v_cmpx_neq_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_neq_f32_e64 v255, v255
+// GFX12: v_cmpx_neq_f32_e64 v255, v255           ; encoding: [0x7e,0x00,0x9d,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_neq_f32_e64 s1, s2
+// GFX12: v_cmpx_neq_f32_e64 s1, s2               ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_neq_f32_e64 s105, s105
+// GFX12: v_cmpx_neq_f32_e64 s105, s105           ; encoding: [0x7e,0x00,0x9d,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_neq_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_neq_f32_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x9d,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_neq_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_neq_f32_e64 vcc_hi, 0xaf123456   ; encoding: [0x7e,0x00,0x9d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_neq_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_neq_f32_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x9d,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_neq_f32_e64 m0, 0.5
+// GFX12: v_cmpx_neq_f32_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x9d,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_neq_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_neq_f32_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x9d,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_neq_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_neq_f32_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x9d,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_neq_f32_e64 null, exec_lo
+// GFX12: v_cmpx_neq_f32_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x9d,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_neq_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_neq_f32_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x9d,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_neq_f32_e64 0.5, -m0
+// GFX12: v_cmpx_neq_f32_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x9d,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_neq_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_neq_f32_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x9d,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_neq_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_neq_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9d,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_neq_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_neq_f64_e64 v[1:2], v[2:3]       ; encoding: [0x7e,0x00,0xad,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_neq_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_neq_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xad,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_neq_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_neq_f64_e64 s[2:3], s[4:5]       ; encoding: [0x7e,0x00,0xad,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_neq_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_neq_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xad,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_neq_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_neq_f64_e64 vcc, ttmp[14:15]     ; encoding: [0x7e,0x00,0xad,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_neq_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_neq_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xad,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_neq_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_neq_f64_e64 -|exec|, src_scc     ; encoding: [0x7e,0x01,0xad,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_neq_f64_e64 null, 0.5
+// GFX12: v_cmpx_neq_f64_e64 null, 0.5            ; encoding: [0x7e,0x00,0xad,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_neq_f64_e64 -1, -1
+// GFX12: v_cmpx_neq_f64_e64 -1, -1               ; encoding: [0x7e,0x00,0xad,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_neq_f64_e64 0.5, null
+// GFX12: v_cmpx_neq_f64_e64 0.5, null            ; encoding: [0x7e,0x00,0xad,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_neq_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_neq_f64_e64 -|src_scc|, -|exec|  ; encoding: [0x7e,0x03,0xad,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_neq_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_neq_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xad,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nge_f16_e64 v1, v2
+// GFX12: v_cmpx_nge_f16_e64 v1, v2               ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nge_f16_e64 v255, v255
+// GFX12: v_cmpx_nge_f16_e64 v255, v255           ; encoding: [0x7e,0x00,0x89,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_nge_f16_e64 s1, s2
+// GFX12: v_cmpx_nge_f16_e64 s1, s2               ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_nge_f16_e64 s105, s105
+// GFX12: v_cmpx_nge_f16_e64 s105, s105           ; encoding: [0x7e,0x00,0x89,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_nge_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_nge_f16_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x89,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_nge_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_nge_f16_e64 vcc_hi, 0xfe0b       ; encoding: [0x7e,0x00,0x89,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nge_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_nge_f16_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x89,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_nge_f16_e64 m0, 0.5
+// GFX12: v_cmpx_nge_f16_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x89,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_nge_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_nge_f16_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x89,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_nge_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_nge_f16_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x89,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_nge_f16_e64 null, exec_lo
+// GFX12: v_cmpx_nge_f16_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x89,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_nge_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_nge_f16_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x89,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_nge_f16_e64 0.5, -m0
+// GFX12: v_cmpx_nge_f16_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x89,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_nge_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_nge_f16_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x89,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_nge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_nge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x89,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nge_f32_e64 v1, v2
+// GFX12: v_cmpx_nge_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nge_f32_e64 v255, v255
+// GFX12: v_cmpx_nge_f32_e64 v255, v255           ; encoding: [0x7e,0x00,0x99,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_nge_f32_e64 s1, s2
+// GFX12: v_cmpx_nge_f32_e64 s1, s2               ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_nge_f32_e64 s105, s105
+// GFX12: v_cmpx_nge_f32_e64 s105, s105           ; encoding: [0x7e,0x00,0x99,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_nge_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_nge_f32_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x99,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_nge_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_nge_f32_e64 vcc_hi, 0xaf123456   ; encoding: [0x7e,0x00,0x99,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nge_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_nge_f32_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x99,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_nge_f32_e64 m0, 0.5
+// GFX12: v_cmpx_nge_f32_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x99,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_nge_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_nge_f32_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x99,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_nge_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_nge_f32_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x99,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_nge_f32_e64 null, exec_lo
+// GFX12: v_cmpx_nge_f32_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x99,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_nge_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_nge_f32_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x99,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_nge_f32_e64 0.5, -m0
+// GFX12: v_cmpx_nge_f32_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x99,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_nge_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_nge_f32_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x99,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_nge_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_nge_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x99,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nge_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_nge_f64_e64 v[1:2], v[2:3]       ; encoding: [0x7e,0x00,0xa9,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nge_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_nge_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa9,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_nge_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_nge_f64_e64 s[2:3], s[4:5]       ; encoding: [0x7e,0x00,0xa9,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_nge_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_nge_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa9,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_nge_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_nge_f64_e64 vcc, ttmp[14:15]     ; encoding: [0x7e,0x00,0xa9,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_nge_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_nge_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa9,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nge_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_nge_f64_e64 -|exec|, src_scc     ; encoding: [0x7e,0x01,0xa9,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_nge_f64_e64 null, 0.5
+// GFX12: v_cmpx_nge_f64_e64 null, 0.5            ; encoding: [0x7e,0x00,0xa9,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_nge_f64_e64 -1, -1
+// GFX12: v_cmpx_nge_f64_e64 -1, -1               ; encoding: [0x7e,0x00,0xa9,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_nge_f64_e64 0.5, null
+// GFX12: v_cmpx_nge_f64_e64 0.5, null            ; encoding: [0x7e,0x00,0xa9,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_nge_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_nge_f64_e64 -|src_scc|, -|exec|  ; encoding: [0x7e,0x03,0xa9,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_nge_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_nge_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa9,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ngt_f16_e64 v1, v2
+// GFX12: v_cmpx_ngt_f16_e64 v1, v2               ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ngt_f16_e64 v255, v255
+// GFX12: v_cmpx_ngt_f16_e64 v255, v255           ; encoding: [0x7e,0x00,0x8b,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ngt_f16_e64 s1, s2
+// GFX12: v_cmpx_ngt_f16_e64 s1, s2               ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ngt_f16_e64 s105, s105
+// GFX12: v_cmpx_ngt_f16_e64 s105, s105           ; encoding: [0x7e,0x00,0x8b,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ngt_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ngt_f16_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x8b,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ngt_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_ngt_f16_e64 vcc_hi, 0xfe0b       ; encoding: [0x7e,0x00,0x8b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ngt_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ngt_f16_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x8b,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ngt_f16_e64 m0, 0.5
+// GFX12: v_cmpx_ngt_f16_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x8b,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ngt_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_ngt_f16_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x8b,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ngt_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_ngt_f16_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x8b,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ngt_f16_e64 null, exec_lo
+// GFX12: v_cmpx_ngt_f16_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x8b,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ngt_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_ngt_f16_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x8b,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ngt_f16_e64 0.5, -m0
+// GFX12: v_cmpx_ngt_f16_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x8b,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_ngt_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_ngt_f16_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x8b,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_ngt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_ngt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ngt_f32_e64 v1, v2
+// GFX12: v_cmpx_ngt_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ngt_f32_e64 v255, v255
+// GFX12: v_cmpx_ngt_f32_e64 v255, v255           ; encoding: [0x7e,0x00,0x9b,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_ngt_f32_e64 s1, s2
+// GFX12: v_cmpx_ngt_f32_e64 s1, s2               ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_ngt_f32_e64 s105, s105
+// GFX12: v_cmpx_ngt_f32_e64 s105, s105           ; encoding: [0x7e,0x00,0x9b,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_ngt_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_ngt_f32_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x9b,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_ngt_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_ngt_f32_e64 vcc_hi, 0xaf123456   ; encoding: [0x7e,0x00,0x9b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ngt_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_ngt_f32_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x9b,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_ngt_f32_e64 m0, 0.5
+// GFX12: v_cmpx_ngt_f32_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x9b,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_ngt_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_ngt_f32_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x9b,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_ngt_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_ngt_f32_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x9b,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_ngt_f32_e64 null, exec_lo
+// GFX12: v_cmpx_ngt_f32_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x9b,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_ngt_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_ngt_f32_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x9b,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_ngt_f32_e64 0.5, -m0
+// GFX12: v_cmpx_ngt_f32_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x9b,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_ngt_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_ngt_f32_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x9b,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_ngt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_ngt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9b,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ngt_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_ngt_f64_e64 v[1:2], v[2:3]       ; encoding: [0x7e,0x00,0xab,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_ngt_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_ngt_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xab,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_ngt_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_ngt_f64_e64 s[2:3], s[4:5]       ; encoding: [0x7e,0x00,0xab,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_ngt_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_ngt_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xab,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_ngt_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_ngt_f64_e64 vcc, ttmp[14:15]     ; encoding: [0x7e,0x00,0xab,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_ngt_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_ngt_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xab,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ngt_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_ngt_f64_e64 -|exec|, src_scc     ; encoding: [0x7e,0x01,0xab,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_ngt_f64_e64 null, 0.5
+// GFX12: v_cmpx_ngt_f64_e64 null, 0.5            ; encoding: [0x7e,0x00,0xab,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_ngt_f64_e64 -1, -1
+// GFX12: v_cmpx_ngt_f64_e64 -1, -1               ; encoding: [0x7e,0x00,0xab,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_ngt_f64_e64 0.5, null
+// GFX12: v_cmpx_ngt_f64_e64 0.5, null            ; encoding: [0x7e,0x00,0xab,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_ngt_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_ngt_f64_e64 -|src_scc|, -|exec|  ; encoding: [0x7e,0x03,0xab,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_ngt_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_ngt_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xab,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nle_f16_e64 v1, v2
+// GFX12: v_cmpx_nle_f16_e64 v1, v2               ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nle_f16_e64 v255, v255
+// GFX12: v_cmpx_nle_f16_e64 v255, v255           ; encoding: [0x7e,0x00,0x8c,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_nle_f16_e64 s1, s2
+// GFX12: v_cmpx_nle_f16_e64 s1, s2               ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_nle_f16_e64 s105, s105
+// GFX12: v_cmpx_nle_f16_e64 s105, s105           ; encoding: [0x7e,0x00,0x8c,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_nle_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_nle_f16_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x8c,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_nle_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_nle_f16_e64 vcc_hi, 0xfe0b       ; encoding: [0x7e,0x00,0x8c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nle_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_nle_f16_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x8c,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_nle_f16_e64 m0, 0.5
+// GFX12: v_cmpx_nle_f16_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x8c,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_nle_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_nle_f16_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x8c,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_nle_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_nle_f16_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x8c,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_nle_f16_e64 null, exec_lo
+// GFX12: v_cmpx_nle_f16_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x8c,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_nle_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_nle_f16_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x8c,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_nle_f16_e64 0.5, -m0
+// GFX12: v_cmpx_nle_f16_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x8c,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_nle_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_nle_f16_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x8c,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_nle_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_nle_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nle_f32_e64 v1, v2
+// GFX12: v_cmpx_nle_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nle_f32_e64 v255, v255
+// GFX12: v_cmpx_nle_f32_e64 v255, v255           ; encoding: [0x7e,0x00,0x9c,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_nle_f32_e64 s1, s2
+// GFX12: v_cmpx_nle_f32_e64 s1, s2               ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_nle_f32_e64 s105, s105
+// GFX12: v_cmpx_nle_f32_e64 s105, s105           ; encoding: [0x7e,0x00,0x9c,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_nle_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_nle_f32_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x9c,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_nle_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_nle_f32_e64 vcc_hi, 0xaf123456   ; encoding: [0x7e,0x00,0x9c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nle_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_nle_f32_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x9c,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_nle_f32_e64 m0, 0.5
+// GFX12: v_cmpx_nle_f32_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x9c,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_nle_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_nle_f32_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x9c,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_nle_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_nle_f32_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x9c,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_nle_f32_e64 null, exec_lo
+// GFX12: v_cmpx_nle_f32_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x9c,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_nle_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_nle_f32_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x9c,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_nle_f32_e64 0.5, -m0
+// GFX12: v_cmpx_nle_f32_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x9c,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_nle_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_nle_f32_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x9c,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_nle_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_nle_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9c,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nle_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_nle_f64_e64 v[1:2], v[2:3]       ; encoding: [0x7e,0x00,0xac,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nle_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_nle_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xac,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_nle_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_nle_f64_e64 s[2:3], s[4:5]       ; encoding: [0x7e,0x00,0xac,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_nle_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_nle_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xac,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_nle_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_nle_f64_e64 vcc, ttmp[14:15]     ; encoding: [0x7e,0x00,0xac,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_nle_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_nle_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xac,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nle_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_nle_f64_e64 -|exec|, src_scc     ; encoding: [0x7e,0x01,0xac,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_nle_f64_e64 null, 0.5
+// GFX12: v_cmpx_nle_f64_e64 null, 0.5            ; encoding: [0x7e,0x00,0xac,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_nle_f64_e64 -1, -1
+// GFX12: v_cmpx_nle_f64_e64 -1, -1               ; encoding: [0x7e,0x00,0xac,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_nle_f64_e64 0.5, null
+// GFX12: v_cmpx_nle_f64_e64 0.5, null            ; encoding: [0x7e,0x00,0xac,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_nle_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_nle_f64_e64 -|src_scc|, -|exec|  ; encoding: [0x7e,0x03,0xac,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_nle_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_nle_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xac,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlg_f16_e64 v1, v2
+// GFX12: v_cmpx_nlg_f16_e64 v1, v2               ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nlg_f16_e64 v255, v255
+// GFX12: v_cmpx_nlg_f16_e64 v255, v255           ; encoding: [0x7e,0x00,0x8a,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_nlg_f16_e64 s1, s2
+// GFX12: v_cmpx_nlg_f16_e64 s1, s2               ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_nlg_f16_e64 s105, s105
+// GFX12: v_cmpx_nlg_f16_e64 s105, s105           ; encoding: [0x7e,0x00,0x8a,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_nlg_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_nlg_f16_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x8a,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_nlg_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_nlg_f16_e64 vcc_hi, 0xfe0b       ; encoding: [0x7e,0x00,0x8a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nlg_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_nlg_f16_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x8a,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_nlg_f16_e64 m0, 0.5
+// GFX12: v_cmpx_nlg_f16_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x8a,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_nlg_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_nlg_f16_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x8a,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_nlg_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_nlg_f16_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x8a,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_nlg_f16_e64 null, exec_lo
+// GFX12: v_cmpx_nlg_f16_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x8a,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_nlg_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_nlg_f16_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x8a,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_nlg_f16_e64 0.5, -m0
+// GFX12: v_cmpx_nlg_f16_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x8a,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_nlg_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_nlg_f16_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x8a,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_nlg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_nlg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nlg_f32_e64 v1, v2
+// GFX12: v_cmpx_nlg_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nlg_f32_e64 v255, v255
+// GFX12: v_cmpx_nlg_f32_e64 v255, v255           ; encoding: [0x7e,0x00,0x9a,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_nlg_f32_e64 s1, s2
+// GFX12: v_cmpx_nlg_f32_e64 s1, s2               ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_nlg_f32_e64 s105, s105
+// GFX12: v_cmpx_nlg_f32_e64 s105, s105           ; encoding: [0x7e,0x00,0x9a,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_nlg_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_nlg_f32_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x9a,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_nlg_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_nlg_f32_e64 vcc_hi, 0xaf123456   ; encoding: [0x7e,0x00,0x9a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlg_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_nlg_f32_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x9a,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_nlg_f32_e64 m0, 0.5
+// GFX12: v_cmpx_nlg_f32_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x9a,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_nlg_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_nlg_f32_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x9a,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_nlg_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_nlg_f32_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x9a,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_nlg_f32_e64 null, exec_lo
+// GFX12: v_cmpx_nlg_f32_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x9a,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_nlg_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_nlg_f32_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x9a,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_nlg_f32_e64 0.5, -m0
+// GFX12: v_cmpx_nlg_f32_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x9a,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_nlg_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_nlg_f32_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x9a,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_nlg_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_nlg_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9a,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlg_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_nlg_f64_e64 v[1:2], v[2:3]       ; encoding: [0x7e,0x00,0xaa,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nlg_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_nlg_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xaa,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_nlg_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_nlg_f64_e64 s[2:3], s[4:5]       ; encoding: [0x7e,0x00,0xaa,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_nlg_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_nlg_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xaa,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_nlg_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_nlg_f64_e64 vcc, ttmp[14:15]     ; encoding: [0x7e,0x00,0xaa,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_nlg_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_nlg_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xaa,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlg_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_nlg_f64_e64 -|exec|, src_scc     ; encoding: [0x7e,0x01,0xaa,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_nlg_f64_e64 null, 0.5
+// GFX12: v_cmpx_nlg_f64_e64 null, 0.5            ; encoding: [0x7e,0x00,0xaa,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_nlg_f64_e64 -1, -1
+// GFX12: v_cmpx_nlg_f64_e64 -1, -1               ; encoding: [0x7e,0x00,0xaa,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_nlg_f64_e64 0.5, null
+// GFX12: v_cmpx_nlg_f64_e64 0.5, null            ; encoding: [0x7e,0x00,0xaa,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_nlg_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_nlg_f64_e64 -|src_scc|, -|exec|  ; encoding: [0x7e,0x03,0xaa,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_nlg_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_nlg_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xaa,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlt_f16_e64 v1, v2
+// GFX12: v_cmpx_nlt_f16_e64 v1, v2               ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nlt_f16_e64 v255, v255
+// GFX12: v_cmpx_nlt_f16_e64 v255, v255           ; encoding: [0x7e,0x00,0x8e,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_nlt_f16_e64 s1, s2
+// GFX12: v_cmpx_nlt_f16_e64 s1, s2               ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_nlt_f16_e64 s105, s105
+// GFX12: v_cmpx_nlt_f16_e64 s105, s105           ; encoding: [0x7e,0x00,0x8e,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_nlt_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_nlt_f16_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x8e,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_nlt_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_nlt_f16_e64 vcc_hi, 0xfe0b       ; encoding: [0x7e,0x00,0x8e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nlt_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_nlt_f16_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x8e,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_nlt_f16_e64 m0, 0.5
+// GFX12: v_cmpx_nlt_f16_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x8e,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_nlt_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_nlt_f16_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x8e,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_nlt_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_nlt_f16_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x8e,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_nlt_f16_e64 null, exec_lo
+// GFX12: v_cmpx_nlt_f16_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x8e,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_nlt_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_nlt_f16_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x8e,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_nlt_f16_e64 0.5, -m0
+// GFX12: v_cmpx_nlt_f16_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x8e,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_nlt_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_nlt_f16_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x8e,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_nlt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_nlt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nlt_f32_e64 v1, v2
+// GFX12: v_cmpx_nlt_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nlt_f32_e64 v255, v255
+// GFX12: v_cmpx_nlt_f32_e64 v255, v255           ; encoding: [0x7e,0x00,0x9e,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_nlt_f32_e64 s1, s2
+// GFX12: v_cmpx_nlt_f32_e64 s1, s2               ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_nlt_f32_e64 s105, s105
+// GFX12: v_cmpx_nlt_f32_e64 s105, s105           ; encoding: [0x7e,0x00,0x9e,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_nlt_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_nlt_f32_e64 vcc_lo, ttmp15       ; encoding: [0x7e,0x00,0x9e,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_nlt_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_nlt_f32_e64 vcc_hi, 0xaf123456   ; encoding: [0x7e,0x00,0x9e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlt_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_nlt_f32_e64 ttmp15, src_scc      ; encoding: [0x7e,0x00,0x9e,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_nlt_f32_e64 m0, 0.5
+// GFX12: v_cmpx_nlt_f32_e64 m0, 0.5              ; encoding: [0x7e,0x00,0x9e,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_nlt_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_nlt_f32_e64 exec_lo, -1          ; encoding: [0x7e,0x00,0x9e,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_nlt_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_nlt_f32_e64 |exec_hi|, null      ; encoding: [0x7e,0x01,0x9e,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_nlt_f32_e64 null, exec_lo
+// GFX12: v_cmpx_nlt_f32_e64 null, exec_lo        ; encoding: [0x7e,0x00,0x9e,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_nlt_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_nlt_f32_e64 -1, exec_hi          ; encoding: [0x7e,0x00,0x9e,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_nlt_f32_e64 0.5, -m0
+// GFX12: v_cmpx_nlt_f32_e64 0.5, -m0             ; encoding: [0x7e,0x00,0x9e,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_nlt_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_nlt_f32_e64 -src_scc, |vcc_lo|   ; encoding: [0x7e,0x02,0x9e,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_nlt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_nlt_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x9e,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlt_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_nlt_f64_e64 v[1:2], v[2:3]       ; encoding: [0x7e,0x00,0xae,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_nlt_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_nlt_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xae,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_nlt_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_nlt_f64_e64 s[2:3], s[4:5]       ; encoding: [0x7e,0x00,0xae,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_nlt_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_nlt_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xae,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_nlt_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_nlt_f64_e64 vcc, ttmp[14:15]     ; encoding: [0x7e,0x00,0xae,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_nlt_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_nlt_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xae,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlt_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_nlt_f64_e64 -|exec|, src_scc     ; encoding: [0x7e,0x01,0xae,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_nlt_f64_e64 null, 0.5
+// GFX12: v_cmpx_nlt_f64_e64 null, 0.5            ; encoding: [0x7e,0x00,0xae,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_nlt_f64_e64 -1, -1
+// GFX12: v_cmpx_nlt_f64_e64 -1, -1               ; encoding: [0x7e,0x00,0xae,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_nlt_f64_e64 0.5, null
+// GFX12: v_cmpx_nlt_f64_e64 0.5, null            ; encoding: [0x7e,0x00,0xae,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_nlt_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_nlt_f64_e64 -|src_scc|, -|exec|  ; encoding: [0x7e,0x03,0xae,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_nlt_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_nlt_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xae,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_o_f16_e64 v1, v2
+// GFX12: v_cmpx_o_f16_e64 v1, v2                 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_o_f16_e64 v255, v255
+// GFX12: v_cmpx_o_f16_e64 v255, v255             ; encoding: [0x7e,0x00,0x87,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_o_f16_e64 s1, s2
+// GFX12: v_cmpx_o_f16_e64 s1, s2                 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_o_f16_e64 s105, s105
+// GFX12: v_cmpx_o_f16_e64 s105, s105             ; encoding: [0x7e,0x00,0x87,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_o_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_o_f16_e64 vcc_lo, ttmp15         ; encoding: [0x7e,0x00,0x87,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_o_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_o_f16_e64 vcc_hi, 0xfe0b         ; encoding: [0x7e,0x00,0x87,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_o_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_o_f16_e64 ttmp15, src_scc        ; encoding: [0x7e,0x00,0x87,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_o_f16_e64 m0, 0.5
+// GFX12: v_cmpx_o_f16_e64 m0, 0.5                ; encoding: [0x7e,0x00,0x87,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_o_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_o_f16_e64 exec_lo, -1            ; encoding: [0x7e,0x00,0x87,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_o_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_o_f16_e64 |exec_hi|, null        ; encoding: [0x7e,0x01,0x87,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_o_f16_e64 null, exec_lo
+// GFX12: v_cmpx_o_f16_e64 null, exec_lo          ; encoding: [0x7e,0x00,0x87,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_o_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_o_f16_e64 -1, exec_hi            ; encoding: [0x7e,0x00,0x87,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_o_f16_e64 0.5, -m0
+// GFX12: v_cmpx_o_f16_e64 0.5, -m0               ; encoding: [0x7e,0x00,0x87,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_o_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_o_f16_e64 -src_scc, |vcc_lo|     ; encoding: [0x7e,0x02,0x87,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_o_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_o_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x87,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_o_f32_e64 v1, v2
+// GFX12: v_cmpx_o_f32_e64 v1, v2                 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_o_f32_e64 v255, v255
+// GFX12: v_cmpx_o_f32_e64 v255, v255             ; encoding: [0x7e,0x00,0x97,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_o_f32_e64 s1, s2
+// GFX12: v_cmpx_o_f32_e64 s1, s2                 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_o_f32_e64 s105, s105
+// GFX12: v_cmpx_o_f32_e64 s105, s105             ; encoding: [0x7e,0x00,0x97,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_o_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_o_f32_e64 vcc_lo, ttmp15         ; encoding: [0x7e,0x00,0x97,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_o_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_o_f32_e64 vcc_hi, 0xaf123456     ; encoding: [0x7e,0x00,0x97,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_o_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_o_f32_e64 ttmp15, src_scc        ; encoding: [0x7e,0x00,0x97,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_o_f32_e64 m0, 0.5
+// GFX12: v_cmpx_o_f32_e64 m0, 0.5                ; encoding: [0x7e,0x00,0x97,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_o_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_o_f32_e64 exec_lo, -1            ; encoding: [0x7e,0x00,0x97,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_o_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_o_f32_e64 |exec_hi|, null        ; encoding: [0x7e,0x01,0x97,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_o_f32_e64 null, exec_lo
+// GFX12: v_cmpx_o_f32_e64 null, exec_lo          ; encoding: [0x7e,0x00,0x97,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_o_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_o_f32_e64 -1, exec_hi            ; encoding: [0x7e,0x00,0x97,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_o_f32_e64 0.5, -m0
+// GFX12: v_cmpx_o_f32_e64 0.5, -m0               ; encoding: [0x7e,0x00,0x97,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_o_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_o_f32_e64 -src_scc, |vcc_lo|     ; encoding: [0x7e,0x02,0x97,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_o_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_o_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x97,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_o_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_o_f64_e64 v[1:2], v[2:3]         ; encoding: [0x7e,0x00,0xa7,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_o_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_o_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa7,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_o_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_o_f64_e64 s[2:3], s[4:5]         ; encoding: [0x7e,0x00,0xa7,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_o_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_o_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa7,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_o_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_o_f64_e64 vcc, ttmp[14:15]       ; encoding: [0x7e,0x00,0xa7,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_o_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_o_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa7,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_o_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_o_f64_e64 -|exec|, src_scc       ; encoding: [0x7e,0x01,0xa7,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_o_f64_e64 null, 0.5
+// GFX12: v_cmpx_o_f64_e64 null, 0.5              ; encoding: [0x7e,0x00,0xa7,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_o_f64_e64 -1, -1
+// GFX12: v_cmpx_o_f64_e64 -1, -1                 ; encoding: [0x7e,0x00,0xa7,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_o_f64_e64 0.5, null
+// GFX12: v_cmpx_o_f64_e64 0.5, null              ; encoding: [0x7e,0x00,0xa7,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_o_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_o_f64_e64 -|src_scc|, -|exec|    ; encoding: [0x7e,0x03,0xa7,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_o_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_o_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa7,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmpx_u_f16_e64 v1, v2
+// GFX12: v_cmpx_u_f16_e64 v1, v2                 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_u_f16_e64 v255, v255
+// GFX12: v_cmpx_u_f16_e64 v255, v255             ; encoding: [0x7e,0x00,0x88,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_u_f16_e64 s1, s2
+// GFX12: v_cmpx_u_f16_e64 s1, s2                 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_u_f16_e64 s105, s105
+// GFX12: v_cmpx_u_f16_e64 s105, s105             ; encoding: [0x7e,0x00,0x88,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_u_f16_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_u_f16_e64 vcc_lo, ttmp15         ; encoding: [0x7e,0x00,0x88,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_u_f16_e64 vcc_hi, 0xfe0b
+// GFX12: v_cmpx_u_f16_e64 vcc_hi, 0xfe0b         ; encoding: [0x7e,0x00,0x88,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_u_f16_e64 ttmp15, src_scc
+// GFX12: v_cmpx_u_f16_e64 ttmp15, src_scc        ; encoding: [0x7e,0x00,0x88,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_u_f16_e64 m0, 0.5
+// GFX12: v_cmpx_u_f16_e64 m0, 0.5                ; encoding: [0x7e,0x00,0x88,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_u_f16_e64 exec_lo, -1
+// GFX12: v_cmpx_u_f16_e64 exec_lo, -1            ; encoding: [0x7e,0x00,0x88,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_u_f16_e64 |exec_hi|, null
+// GFX12: v_cmpx_u_f16_e64 |exec_hi|, null        ; encoding: [0x7e,0x01,0x88,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_u_f16_e64 null, exec_lo
+// GFX12: v_cmpx_u_f16_e64 null, exec_lo          ; encoding: [0x7e,0x00,0x88,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_u_f16_e64 -1, exec_hi
+// GFX12: v_cmpx_u_f16_e64 -1, exec_hi            ; encoding: [0x7e,0x00,0x88,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_u_f16_e64 0.5, -m0
+// GFX12: v_cmpx_u_f16_e64 0.5, -m0               ; encoding: [0x7e,0x00,0x88,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_u_f16_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_u_f16_e64 -src_scc, |vcc_lo|     ; encoding: [0x7e,0x02,0x88,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_u_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: v_cmpx_u_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x88,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_u_f32_e64 v1, v2
+// GFX12: v_cmpx_u_f32_e64 v1, v2                 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_u_f32_e64 v255, v255
+// GFX12: v_cmpx_u_f32_e64 v255, v255             ; encoding: [0x7e,0x00,0x98,0xd4,0xff,0xff,0x03,0x00]
+
+v_cmpx_u_f32_e64 s1, s2
+// GFX12: v_cmpx_u_f32_e64 s1, s2                 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x04,0x00,0x00]
+
+v_cmpx_u_f32_e64 s105, s105
+// GFX12: v_cmpx_u_f32_e64 s105, s105             ; encoding: [0x7e,0x00,0x98,0xd4,0x69,0xd2,0x00,0x00]
+
+v_cmpx_u_f32_e64 vcc_lo, ttmp15
+// GFX12: v_cmpx_u_f32_e64 vcc_lo, ttmp15         ; encoding: [0x7e,0x00,0x98,0xd4,0x6a,0xf6,0x00,0x00]
+
+v_cmpx_u_f32_e64 vcc_hi, 0xaf123456
+// GFX12: v_cmpx_u_f32_e64 vcc_hi, 0xaf123456     ; encoding: [0x7e,0x00,0x98,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_u_f32_e64 ttmp15, src_scc
+// GFX12: v_cmpx_u_f32_e64 ttmp15, src_scc        ; encoding: [0x7e,0x00,0x98,0xd4,0x7b,0xfa,0x01,0x00]
+
+v_cmpx_u_f32_e64 m0, 0.5
+// GFX12: v_cmpx_u_f32_e64 m0, 0.5                ; encoding: [0x7e,0x00,0x98,0xd4,0x7d,0xe0,0x01,0x00]
+
+v_cmpx_u_f32_e64 exec_lo, -1
+// GFX12: v_cmpx_u_f32_e64 exec_lo, -1            ; encoding: [0x7e,0x00,0x98,0xd4,0x7e,0x82,0x01,0x00]
+
+v_cmpx_u_f32_e64 |exec_hi|, null
+// GFX12: v_cmpx_u_f32_e64 |exec_hi|, null        ; encoding: [0x7e,0x01,0x98,0xd4,0x7f,0xf8,0x00,0x00]
+
+v_cmpx_u_f32_e64 null, exec_lo
+// GFX12: v_cmpx_u_f32_e64 null, exec_lo          ; encoding: [0x7e,0x00,0x98,0xd4,0x7c,0xfc,0x00,0x00]
+
+v_cmpx_u_f32_e64 -1, exec_hi
+// GFX12: v_cmpx_u_f32_e64 -1, exec_hi            ; encoding: [0x7e,0x00,0x98,0xd4,0xc1,0xfe,0x00,0x00]
+
+v_cmpx_u_f32_e64 0.5, -m0
+// GFX12: v_cmpx_u_f32_e64 0.5, -m0               ; encoding: [0x7e,0x00,0x98,0xd4,0xf0,0xfa,0x00,0x40]
+
+v_cmpx_u_f32_e64 -src_scc, |vcc_lo|
+// GFX12: v_cmpx_u_f32_e64 -src_scc, |vcc_lo|     ; encoding: [0x7e,0x02,0x98,0xd4,0xfd,0xd4,0x00,0x20]
+
+v_cmpx_u_f32_e64 -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: v_cmpx_u_f32_e64 -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x98,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmpx_u_f64_e64 v[1:2], v[2:3]
+// GFX12: v_cmpx_u_f64_e64 v[1:2], v[2:3]         ; encoding: [0x7e,0x00,0xa8,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_u_f64_e64 v[254:255], v[254:255]
+// GFX12: v_cmpx_u_f64_e64 v[254:255], v[254:255] ; encoding: [0x7e,0x00,0xa8,0xd4,0xfe,0xfd,0x03,0x00]
+
+v_cmpx_u_f64_e64 s[2:3], s[4:5]
+// GFX12: v_cmpx_u_f64_e64 s[2:3], s[4:5]         ; encoding: [0x7e,0x00,0xa8,0xd4,0x02,0x08,0x00,0x00]
+
+v_cmpx_u_f64_e64 s[104:105], s[104:105]
+// GFX12: v_cmpx_u_f64_e64 s[104:105], s[104:105] ; encoding: [0x7e,0x00,0xa8,0xd4,0x68,0xd0,0x00,0x00]
+
+v_cmpx_u_f64_e64 vcc, ttmp[14:15]
+// GFX12: v_cmpx_u_f64_e64 vcc, ttmp[14:15]       ; encoding: [0x7e,0x00,0xa8,0xd4,0x6a,0xf4,0x00,0x00]
+
+v_cmpx_u_f64_e64 ttmp[14:15], 0xaf123456
+// GFX12: v_cmpx_u_f64_e64 ttmp[14:15], 0xaf123456 ; encoding: [0x7e,0x00,0xa8,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmpx_u_f64_e64 -|exec|, src_scc
+// GFX12: v_cmpx_u_f64_e64 -|exec|, src_scc       ; encoding: [0x7e,0x01,0xa8,0xd4,0x7e,0xfa,0x01,0x20]
+
+v_cmpx_u_f64_e64 null, 0.5
+// GFX12: v_cmpx_u_f64_e64 null, 0.5              ; encoding: [0x7e,0x00,0xa8,0xd4,0x7c,0xe0,0x01,0x00]
+
+v_cmpx_u_f64_e64 -1, -1
+// GFX12: v_cmpx_u_f64_e64 -1, -1                 ; encoding: [0x7e,0x00,0xa8,0xd4,0xc1,0x82,0x01,0x00]
+
+v_cmpx_u_f64_e64 0.5, null
+// GFX12: v_cmpx_u_f64_e64 0.5, null              ; encoding: [0x7e,0x00,0xa8,0xd4,0xf0,0xf8,0x00,0x00]
+
+v_cmpx_u_f64_e64 -|src_scc|, -|exec|
+// GFX12: v_cmpx_u_f64_e64 -|src_scc|, -|exec|    ; encoding: [0x7e,0x03,0xa8,0xd4,0xfd,0xfc,0x00,0x60]
+
+v_cmpx_u_f64_e64 0xaf123456, -|vcc| clamp
+// GFX12: v_cmpx_u_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa8,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s
index 5772fb30391f8..476ea846f603a 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
 
 v_cmpx_class_f16_e64 v1, v2
 // GFX12: v_cmpx_class_f16_e64 v1, v2             ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s
new file mode 100644
index 0000000000000..8a586de63b508
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16-fake16.s
@@ -0,0 +1,2595 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+
+v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_class_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_class_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_class_f16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_class_f16_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_class_f16_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x01,0xfd,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+
+v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_class_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_class_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_class_f32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_class_f32_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_class_f32_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x01,0xfe,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_eq_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x82,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_eq_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x82,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x82,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_eq_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x92,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_eq_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x92,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x92,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_eq_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_eq_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_eq_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_eq_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x86,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x86,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ge_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x96,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ge_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x96,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x96,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ge_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ge_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ge_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ge_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_gt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x84,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_gt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x84,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x84,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_gt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x94,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_gt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x94,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x94,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_gt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_gt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_gt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_gt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_le_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x83,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_le_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x83,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x83,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_le_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x93,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_le_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x93,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x93,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_le_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_le_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_le_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_le_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_lg_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_lg_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x85,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lg_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_lg_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x85,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x85,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_lg_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_lg_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x95,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_lg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x95,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_lt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x91,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_lt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x91,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x91,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_lt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_lt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_lt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_lt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ne_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ne_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ne_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ne_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_neq_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_neq_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_neq_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_neq_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_neq_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_neq_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9d,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_neq_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_neq_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9d,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9d,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_nge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_nge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x89,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_nge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x89,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x89,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_nge_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_nge_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x99,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nge_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_nge_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x99,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x99,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ngt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ngt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ngt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ngt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_ngt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_ngt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9b,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ngt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_ngt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9b,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9b,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_nle_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_nle_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nle_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_nle_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_nle_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_nle_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9c,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nle_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_nle_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9c,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlg_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_nlg_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlg_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_nlg_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlg_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_nlg_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9a,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_nlg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9a,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9a,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_nlt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x8e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_nlt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x8e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x8e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_nlt_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x9e,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_nlt_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x9e,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x9e,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_o_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_o_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_o_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_o_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_o_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x87,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_o_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_o_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x87,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x87,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_o_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_o_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_o_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_o_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_o_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x97,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_o_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_o_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x97,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x97,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_u_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_u_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_u_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_u_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_u_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x88,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_u_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_u_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x88,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x88,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_u_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_u_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3]
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 row_mirror
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 row_half_mirror
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 row_shl:1
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 row_shl:15
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 row_shr:1
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 row_shr:15
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 row_ror:1
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 row_ror:15
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_cmpx_u_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_cmpx_u_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_cmpx_u_f32_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x98,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_u_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cmpx_u_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x98,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_u_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cmpx_u_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x98,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s
index 57f89d3e31e7c..a6953ecc1d78a 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
 
 v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s
new file mode 100644
index 0000000000000..0b2383456b6e3
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8-fake16.s
@@ -0,0 +1,897 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+
+v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_class_f16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_class_f16_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x01,0xfd,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_class_f32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f32_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_class_f32_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfe,0xd4,0xea,0xe8,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f32_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_class_f32_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x01,0xfe,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x82,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x82,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x82,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x82,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x82,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x92,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x92,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x92,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x92,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x92,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x92,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_eq_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc2,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_eq_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc2,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_eq_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xca,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_eq_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_eq_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xca,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x86,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x86,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x96,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x96,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x96,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x96,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x96,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x96,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ge_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc6,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ge_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc6,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ge_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xce,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ge_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ge_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xce,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x84,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x84,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x84,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x84,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x84,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x94,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x94,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x94,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x94,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x94,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x94,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_gt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc4,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_gt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc4,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_gt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcc,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_gt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_gt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xcc,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x83,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x83,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x83,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x83,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x83,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x93,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x93,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x93,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x93,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x93,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x93,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_le_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc3,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_le_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc3,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_le_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcb,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_le_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_le_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xcb,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_lg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x85,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x85,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x85,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x85,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x85,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_lg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x95,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x95,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x95,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x95,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x95,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x95,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x81,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x91,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x91,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x91,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x91,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x91,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_lt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc1,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_lt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc1,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_lt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc9,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_lt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_lt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc9,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ne_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc5,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ne_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xc5,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ne_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ne_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ne_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x00,0xcd,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
+
+v_cmpx_neq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_neq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_neq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_neq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8d,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_neq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8d,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_neq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_neq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_neq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_neq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9d,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_neq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9d,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_nge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x89,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x89,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x89,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x89,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x89,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_nge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x99,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x99,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x99,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x99,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x99,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x99,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_ngt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ngt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ngt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ngt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8b,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ngt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8b,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_ngt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ngt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ngt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ngt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9b,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_ngt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9b,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_nle_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nle_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nle_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nle_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8c,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nle_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8c,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_nle_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nle_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nle_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nle_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9c,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nle_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9c,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_nlg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8a,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8a,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_nlg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9a,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9a,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_nlt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8e,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x8e,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x8e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_nlt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9e,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_nlt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x9e,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x9e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_o_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_o_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x87,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_o_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x87,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_o_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x87,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_o_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x87,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x87,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_o_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_o_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x97,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_o_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x97,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_o_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x97,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_o_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x97,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_o_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x97,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x97,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_u_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_u_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x88,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_u_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x88,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_u_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x88,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_u_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x88,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x88,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cmpx_u_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_u_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x98,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_u_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x98,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_u_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x98,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_u_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x98,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cmpx_u_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x98,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cmpx_u_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x98,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s
index d69a17e76d555..8e2899086f2bd 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
 
 v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]

From 222ff186087f0f1d9976ac2512dca52d90e13472 Mon Sep 17 00:00:00 2001
From: Brox Chen <guochen2@amd.com>
Date: Thu, 9 Jan 2025 13:40:13 -0500
Subject: [PATCH 28/79] [AMDGPU][True16][CodeGen] Update codegen pattern for
 v_med3_f16 (#121992)

true16 codegen pattern for v_med3_f16
---
 llvm/lib/Target/AMDGPU/SIInstructions.td |   2 +
 llvm/test/CodeGen/AMDGPU/fmed3.ll        | 180 ++++++++++++++++++-----
 2 files changed, 147 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index ee83dff227a85..4325ab448e581 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3670,6 +3670,8 @@ defm : FPMed3Pat<f32, V_MED3_F32_e64>;
 let SubtargetPredicate = HasMed3_16 in {
 let True16Predicate = NotHasTrue16BitInsts in
 defm : FPMed3Pat<f16, V_MED3_F16_e64>;
+let True16Predicate = UseRealTrue16Insts in
+defm : FPMed3Pat<f16, V_MED3_F16_t16_e64>;
 let True16Predicate = UseFakeTrue16Insts in
 defm : FPMed3Pat<f16, V_MED3_F16_fake16_e64>;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 1cdcf276c526b..f490ecf68d984 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -5,8 +5,10 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
 
 define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
 ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
@@ -7531,19 +7533,61 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %o
 ; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_u16 v1, v0, s[2:3]
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX11-NEXT:    v_med3_f16 v1, v1, 2.0, 4.0
-; GFX11-NEXT:    global_store_b16 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX11-SDAG-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_f16 v1, v1, 2.0, 4.0
+; GFX11-SDAG-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX11-SDAG-FAKE16-NEXT:    s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_f16 v1, v1, 2.0, 4.0
+; GFX11-GISEL-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
+;
+; GFX11-SDAG-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    global_load_u16 v0, v1, s[2:3]
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_f16 v0.l, v0.l, 2.0, 4.0
+; GFX11-SDAG-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
+; GFX11-SDAG-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v0, v1, s[2:3]
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_f16 v0.l, v0.l, 2.0, 4.0
+; GFX11-GISEL-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
+; GFX11-GISEL-TRUE16-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr half, ptr addrspace(1) %out, i32 %tid
@@ -7723,26 +7767,92 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, pt
 ; GFX9-NEXT:    global_store_short v0, v1, s[8:9]
 ; GFX9-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_nnan_inputs_med3_f16_pat0:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_u16 v3, v0, s[6:7] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX11-NEXT:    v_add_f16_e32 v2, 2.0, v2
-; GFX11-NEXT:    v_add_f16_e32 v3, 4.0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_med3_f16 v1, v1, v2, v3
-; GFX11-NEXT:    global_store_b16 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX11-SDAG-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    global_load_u16 v3, v0, s[6:7] glc dlc
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v2, 2.0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_add_f16_e32 v3, 4.0, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_f16 v1, v1, v2, v3
+; GFX11-SDAG-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX11-SDAG-FAKE16-NEXT:    s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v3, v0, s[6:7] glc dlc
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
+; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v2, 2.0, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_add_f16_e32 v3, 4.0, v3
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_f16 v1, v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
+;
+; GFX11-SDAG-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    global_load_u16 v0, v2, s[2:3] glc dlc
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    global_load_u16 v1, v2, s[4:5] glc dlc
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    global_load_u16 v3, v2, s[6:7] glc dlc
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v0.h, 2.0, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_add_f16_e32 v1.l, 4.0, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_f16 v0.l, v0.l, v0.h, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    global_store_b16 v2, v0, s[0:1]
+; GFX11-SDAG-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v0, v2, s[2:3] glc dlc
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v1, v2, s[4:5] glc dlc
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v3, v2, s[6:7] glc dlc
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v0.h, 2.0, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    v_add_f16_e32 v1.l, 4.0, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_f16 v0.l, v0.l, v0.h, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    global_store_b16 v2, v0, s[0:1]
+; GFX11-GISEL-TRUE16-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr half, ptr addrspace(1) %bptr, i32 %tid

From 031f33cca3c953dd09ac439fdb503fb3cb36af5e Mon Sep 17 00:00:00 2001
From: Philip Reames <preames@rivosinc.com>
Date: Thu, 9 Jan 2025 10:41:21 -0800
Subject: [PATCH 29/79] [RISCV] Add tests for legalization of <N x i128> and <N
 x i256> shuffles

---
 .../RISCV/rvv/fixed-vectors-int-shuffles.ll   | 234 ++++++++++++++++++
 .../rvv/fixed-vectors-shuffle-exact-vlen.ll   | 180 ++++++++++++++
 2 files changed, 414 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 0bd8466669dc8..8915603471ec7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -1141,3 +1141,237 @@ define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) {
   %out = shufflevector <16 x i32> %splat, <16 x i32> %w, <16 x i32> <i32 11, i32 15, i32 7, i32 3, i32 26, i32 30, i32 22, i32 18, i32 9, i32 13, i32 5, i32 1, i32 24, i32 28, i32 20, i32 16>
   ret <16 x i32> %out
 }
+
+define <4 x i128> @shuffle_i128(<4 x i128> %a) {
+; RV32-LABEL: shuffle_i128:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 0(a1)
+; RV32-NEXT:    lw a3, 4(a1)
+; RV32-NEXT:    lw a4, 8(a1)
+; RV32-NEXT:    lw a5, 12(a1)
+; RV32-NEXT:    lw a6, 48(a1)
+; RV32-NEXT:    lw a7, 52(a1)
+; RV32-NEXT:    lw t0, 56(a1)
+; RV32-NEXT:    lw t1, 60(a1)
+; RV32-NEXT:    lw t2, 32(a1)
+; RV32-NEXT:    lw t3, 36(a1)
+; RV32-NEXT:    lw t4, 40(a1)
+; RV32-NEXT:    lw a1, 44(a1)
+; RV32-NEXT:    sw t2, 48(a0)
+; RV32-NEXT:    sw t3, 52(a0)
+; RV32-NEXT:    sw t4, 56(a0)
+; RV32-NEXT:    sw a1, 60(a0)
+; RV32-NEXT:    sw a6, 32(a0)
+; RV32-NEXT:    sw a7, 36(a0)
+; RV32-NEXT:    sw t0, 40(a0)
+; RV32-NEXT:    sw t1, 44(a0)
+; RV32-NEXT:    sw a2, 16(a0)
+; RV32-NEXT:    sw a3, 20(a0)
+; RV32-NEXT:    sw a4, 24(a0)
+; RV32-NEXT:    sw a5, 28(a0)
+; RV32-NEXT:    sw a2, 0(a0)
+; RV32-NEXT:    sw a3, 4(a0)
+; RV32-NEXT:    sw a4, 8(a0)
+; RV32-NEXT:    sw a5, 12(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: shuffle_i128:
+; RV64:       # %bb.0:
+; RV64-NEXT:    ld a2, 48(a1)
+; RV64-NEXT:    ld a3, 56(a1)
+; RV64-NEXT:    ld a4, 0(a1)
+; RV64-NEXT:    ld a5, 8(a1)
+; RV64-NEXT:    ld a6, 32(a1)
+; RV64-NEXT:    ld a1, 40(a1)
+; RV64-NEXT:    sd a2, 32(a0)
+; RV64-NEXT:    sd a3, 40(a0)
+; RV64-NEXT:    sd a6, 48(a0)
+; RV64-NEXT:    sd a1, 56(a0)
+; RV64-NEXT:    sd a4, 0(a0)
+; RV64-NEXT:    sd a5, 8(a0)
+; RV64-NEXT:    sd a4, 16(a0)
+; RV64-NEXT:    sd a5, 24(a0)
+; RV64-NEXT:    ret
+  %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
+  ret <4 x i128> %res
+}
+
+define void @shuffle_i128_ldst(ptr %p) {
+; RV32-LABEL: shuffle_i128_ldst:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a1, 48(a0)
+; RV32-NEXT:    lw a2, 52(a0)
+; RV32-NEXT:    lw a3, 56(a0)
+; RV32-NEXT:    lw a4, 60(a0)
+; RV32-NEXT:    lw a5, 0(a0)
+; RV32-NEXT:    lw a6, 4(a0)
+; RV32-NEXT:    lw a7, 8(a0)
+; RV32-NEXT:    lw t0, 12(a0)
+; RV32-NEXT:    lw t1, 32(a0)
+; RV32-NEXT:    lw t2, 36(a0)
+; RV32-NEXT:    lw t3, 40(a0)
+; RV32-NEXT:    lw t4, 44(a0)
+; RV32-NEXT:    sw t1, 48(a0)
+; RV32-NEXT:    sw t2, 52(a0)
+; RV32-NEXT:    sw t3, 56(a0)
+; RV32-NEXT:    sw t4, 60(a0)
+; RV32-NEXT:    sw a5, 16(a0)
+; RV32-NEXT:    sw a6, 20(a0)
+; RV32-NEXT:    sw a7, 24(a0)
+; RV32-NEXT:    sw t0, 28(a0)
+; RV32-NEXT:    sw a1, 32(a0)
+; RV32-NEXT:    sw a2, 36(a0)
+; RV32-NEXT:    sw a3, 40(a0)
+; RV32-NEXT:    sw a4, 44(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: shuffle_i128_ldst:
+; RV64:       # %bb.0:
+; RV64-NEXT:    ld a1, 0(a0)
+; RV64-NEXT:    ld a2, 8(a0)
+; RV64-NEXT:    ld a3, 32(a0)
+; RV64-NEXT:    ld a4, 40(a0)
+; RV64-NEXT:    ld a5, 48(a0)
+; RV64-NEXT:    ld a6, 56(a0)
+; RV64-NEXT:    sd a3, 48(a0)
+; RV64-NEXT:    sd a4, 56(a0)
+; RV64-NEXT:    sd a1, 16(a0)
+; RV64-NEXT:    sd a2, 24(a0)
+; RV64-NEXT:    sd a5, 32(a0)
+; RV64-NEXT:    sd a6, 40(a0)
+; RV64-NEXT:    ret
+  %a = load <4 x i128>, ptr %p
+  %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
+  store <4 x i128> %res, ptr %p
+  ret void
+}
+
+define void @shuffle_i256_ldst(ptr %p) {
+; RV32-LABEL: shuffle_i256_ldst:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -48
+; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset s0, -4
+; RV32-NEXT:    .cfi_offset s1, -8
+; RV32-NEXT:    .cfi_offset s2, -12
+; RV32-NEXT:    .cfi_offset s3, -16
+; RV32-NEXT:    .cfi_offset s4, -20
+; RV32-NEXT:    .cfi_offset s5, -24
+; RV32-NEXT:    .cfi_offset s6, -28
+; RV32-NEXT:    .cfi_offset s7, -32
+; RV32-NEXT:    .cfi_offset s8, -36
+; RV32-NEXT:    .cfi_offset s9, -40
+; RV32-NEXT:    lw a1, 0(a0)
+; RV32-NEXT:    lw a2, 4(a0)
+; RV32-NEXT:    lw a3, 8(a0)
+; RV32-NEXT:    lw a4, 12(a0)
+; RV32-NEXT:    lw a5, 16(a0)
+; RV32-NEXT:    lw a6, 20(a0)
+; RV32-NEXT:    lw a7, 24(a0)
+; RV32-NEXT:    lw t0, 28(a0)
+; RV32-NEXT:    lw t1, 96(a0)
+; RV32-NEXT:    lw t2, 100(a0)
+; RV32-NEXT:    lw t3, 104(a0)
+; RV32-NEXT:    lw t4, 108(a0)
+; RV32-NEXT:    lw t5, 112(a0)
+; RV32-NEXT:    lw t6, 116(a0)
+; RV32-NEXT:    lw s0, 120(a0)
+; RV32-NEXT:    lw s1, 124(a0)
+; RV32-NEXT:    lw s2, 64(a0)
+; RV32-NEXT:    lw s3, 68(a0)
+; RV32-NEXT:    lw s4, 72(a0)
+; RV32-NEXT:    lw s5, 76(a0)
+; RV32-NEXT:    lw s6, 80(a0)
+; RV32-NEXT:    lw s7, 84(a0)
+; RV32-NEXT:    lw s8, 88(a0)
+; RV32-NEXT:    lw s9, 92(a0)
+; RV32-NEXT:    sw s6, 112(a0)
+; RV32-NEXT:    sw s7, 116(a0)
+; RV32-NEXT:    sw s8, 120(a0)
+; RV32-NEXT:    sw s9, 124(a0)
+; RV32-NEXT:    sw s2, 96(a0)
+; RV32-NEXT:    sw s3, 100(a0)
+; RV32-NEXT:    sw s4, 104(a0)
+; RV32-NEXT:    sw s5, 108(a0)
+; RV32-NEXT:    sw t5, 80(a0)
+; RV32-NEXT:    sw t6, 84(a0)
+; RV32-NEXT:    sw s0, 88(a0)
+; RV32-NEXT:    sw s1, 92(a0)
+; RV32-NEXT:    sw t1, 64(a0)
+; RV32-NEXT:    sw t2, 68(a0)
+; RV32-NEXT:    sw t3, 72(a0)
+; RV32-NEXT:    sw t4, 76(a0)
+; RV32-NEXT:    sw a5, 48(a0)
+; RV32-NEXT:    sw a6, 52(a0)
+; RV32-NEXT:    sw a7, 56(a0)
+; RV32-NEXT:    sw t0, 60(a0)
+; RV32-NEXT:    sw a1, 32(a0)
+; RV32-NEXT:    sw a2, 36(a0)
+; RV32-NEXT:    sw a3, 40(a0)
+; RV32-NEXT:    sw a4, 44(a0)
+; RV32-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    .cfi_restore s0
+; RV32-NEXT:    .cfi_restore s1
+; RV32-NEXT:    .cfi_restore s2
+; RV32-NEXT:    .cfi_restore s3
+; RV32-NEXT:    .cfi_restore s4
+; RV32-NEXT:    .cfi_restore s5
+; RV32-NEXT:    .cfi_restore s6
+; RV32-NEXT:    .cfi_restore s7
+; RV32-NEXT:    .cfi_restore s8
+; RV32-NEXT:    .cfi_restore s9
+; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: shuffle_i256_ldst:
+; RV64:       # %bb.0:
+; RV64-NEXT:    ld a1, 96(a0)
+; RV64-NEXT:    ld a2, 104(a0)
+; RV64-NEXT:    ld a3, 112(a0)
+; RV64-NEXT:    ld a4, 120(a0)
+; RV64-NEXT:    ld a5, 0(a0)
+; RV64-NEXT:    ld a6, 8(a0)
+; RV64-NEXT:    ld a7, 16(a0)
+; RV64-NEXT:    ld t0, 24(a0)
+; RV64-NEXT:    ld t1, 64(a0)
+; RV64-NEXT:    ld t2, 72(a0)
+; RV64-NEXT:    ld t3, 80(a0)
+; RV64-NEXT:    ld t4, 88(a0)
+; RV64-NEXT:    sd t1, 96(a0)
+; RV64-NEXT:    sd t2, 104(a0)
+; RV64-NEXT:    sd t3, 112(a0)
+; RV64-NEXT:    sd t4, 120(a0)
+; RV64-NEXT:    sd a5, 32(a0)
+; RV64-NEXT:    sd a6, 40(a0)
+; RV64-NEXT:    sd a7, 48(a0)
+; RV64-NEXT:    sd t0, 56(a0)
+; RV64-NEXT:    sd a1, 64(a0)
+; RV64-NEXT:    sd a2, 72(a0)
+; RV64-NEXT:    sd a3, 80(a0)
+; RV64-NEXT:    sd a4, 88(a0)
+; RV64-NEXT:    ret
+  %a = load <4 x i256>, ptr %p
+  %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
+  store <4 x i256> %res, ptr %p
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
index bb05eb5368ae9..4603c0d24f5d7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
@@ -400,3 +400,183 @@ entry:
   %conv199 = sext i32 %4 to i64
   ret i64 %conv199
 }
+
+define void @shuffle_i128_ldst(ptr %p) vscale_range(2,2) {
+; RV32-LABEL: shuffle_i128_ldst:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a1, 48(a0)
+; RV32-NEXT:    lw a2, 52(a0)
+; RV32-NEXT:    lw a3, 56(a0)
+; RV32-NEXT:    lw a4, 60(a0)
+; RV32-NEXT:    lw a5, 0(a0)
+; RV32-NEXT:    lw a6, 4(a0)
+; RV32-NEXT:    lw a7, 8(a0)
+; RV32-NEXT:    lw t0, 12(a0)
+; RV32-NEXT:    lw t1, 32(a0)
+; RV32-NEXT:    lw t2, 36(a0)
+; RV32-NEXT:    lw t3, 40(a0)
+; RV32-NEXT:    lw t4, 44(a0)
+; RV32-NEXT:    sw t1, 48(a0)
+; RV32-NEXT:    sw t2, 52(a0)
+; RV32-NEXT:    sw t3, 56(a0)
+; RV32-NEXT:    sw t4, 60(a0)
+; RV32-NEXT:    sw a5, 16(a0)
+; RV32-NEXT:    sw a6, 20(a0)
+; RV32-NEXT:    sw a7, 24(a0)
+; RV32-NEXT:    sw t0, 28(a0)
+; RV32-NEXT:    sw a1, 32(a0)
+; RV32-NEXT:    sw a2, 36(a0)
+; RV32-NEXT:    sw a3, 40(a0)
+; RV32-NEXT:    sw a4, 44(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: shuffle_i128_ldst:
+; RV64:       # %bb.0:
+; RV64-NEXT:    ld a1, 0(a0)
+; RV64-NEXT:    ld a2, 8(a0)
+; RV64-NEXT:    ld a3, 32(a0)
+; RV64-NEXT:    ld a4, 40(a0)
+; RV64-NEXT:    ld a5, 48(a0)
+; RV64-NEXT:    ld a6, 56(a0)
+; RV64-NEXT:    sd a3, 48(a0)
+; RV64-NEXT:    sd a4, 56(a0)
+; RV64-NEXT:    sd a1, 16(a0)
+; RV64-NEXT:    sd a2, 24(a0)
+; RV64-NEXT:    sd a5, 32(a0)
+; RV64-NEXT:    sd a6, 40(a0)
+; RV64-NEXT:    ret
+  %a = load <4 x i128>, ptr %p
+  %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
+  store <4 x i128> %res, ptr %p
+  ret void
+}
+
+define void @shuffle_i256_ldst(ptr %p) vscale_range(2,2) {
+; RV32-LABEL: shuffle_i256_ldst:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -48
+; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset s0, -4
+; RV32-NEXT:    .cfi_offset s1, -8
+; RV32-NEXT:    .cfi_offset s2, -12
+; RV32-NEXT:    .cfi_offset s3, -16
+; RV32-NEXT:    .cfi_offset s4, -20
+; RV32-NEXT:    .cfi_offset s5, -24
+; RV32-NEXT:    .cfi_offset s6, -28
+; RV32-NEXT:    .cfi_offset s7, -32
+; RV32-NEXT:    .cfi_offset s8, -36
+; RV32-NEXT:    .cfi_offset s9, -40
+; RV32-NEXT:    lw a1, 0(a0)
+; RV32-NEXT:    lw a2, 4(a0)
+; RV32-NEXT:    lw a3, 8(a0)
+; RV32-NEXT:    lw a4, 12(a0)
+; RV32-NEXT:    lw a5, 16(a0)
+; RV32-NEXT:    lw a6, 20(a0)
+; RV32-NEXT:    lw a7, 24(a0)
+; RV32-NEXT:    lw t0, 28(a0)
+; RV32-NEXT:    lw t1, 96(a0)
+; RV32-NEXT:    lw t2, 100(a0)
+; RV32-NEXT:    lw t3, 104(a0)
+; RV32-NEXT:    lw t4, 108(a0)
+; RV32-NEXT:    lw t5, 112(a0)
+; RV32-NEXT:    lw t6, 116(a0)
+; RV32-NEXT:    lw s0, 120(a0)
+; RV32-NEXT:    lw s1, 124(a0)
+; RV32-NEXT:    lw s2, 64(a0)
+; RV32-NEXT:    lw s3, 68(a0)
+; RV32-NEXT:    lw s4, 72(a0)
+; RV32-NEXT:    lw s5, 76(a0)
+; RV32-NEXT:    lw s6, 80(a0)
+; RV32-NEXT:    lw s7, 84(a0)
+; RV32-NEXT:    lw s8, 88(a0)
+; RV32-NEXT:    lw s9, 92(a0)
+; RV32-NEXT:    sw s6, 112(a0)
+; RV32-NEXT:    sw s7, 116(a0)
+; RV32-NEXT:    sw s8, 120(a0)
+; RV32-NEXT:    sw s9, 124(a0)
+; RV32-NEXT:    sw s2, 96(a0)
+; RV32-NEXT:    sw s3, 100(a0)
+; RV32-NEXT:    sw s4, 104(a0)
+; RV32-NEXT:    sw s5, 108(a0)
+; RV32-NEXT:    sw t5, 80(a0)
+; RV32-NEXT:    sw t6, 84(a0)
+; RV32-NEXT:    sw s0, 88(a0)
+; RV32-NEXT:    sw s1, 92(a0)
+; RV32-NEXT:    sw t1, 64(a0)
+; RV32-NEXT:    sw t2, 68(a0)
+; RV32-NEXT:    sw t3, 72(a0)
+; RV32-NEXT:    sw t4, 76(a0)
+; RV32-NEXT:    sw a5, 48(a0)
+; RV32-NEXT:    sw a6, 52(a0)
+; RV32-NEXT:    sw a7, 56(a0)
+; RV32-NEXT:    sw t0, 60(a0)
+; RV32-NEXT:    sw a1, 32(a0)
+; RV32-NEXT:    sw a2, 36(a0)
+; RV32-NEXT:    sw a3, 40(a0)
+; RV32-NEXT:    sw a4, 44(a0)
+; RV32-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    .cfi_restore s0
+; RV32-NEXT:    .cfi_restore s1
+; RV32-NEXT:    .cfi_restore s2
+; RV32-NEXT:    .cfi_restore s3
+; RV32-NEXT:    .cfi_restore s4
+; RV32-NEXT:    .cfi_restore s5
+; RV32-NEXT:    .cfi_restore s6
+; RV32-NEXT:    .cfi_restore s7
+; RV32-NEXT:    .cfi_restore s8
+; RV32-NEXT:    .cfi_restore s9
+; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: shuffle_i256_ldst:
+; RV64:       # %bb.0:
+; RV64-NEXT:    ld a1, 96(a0)
+; RV64-NEXT:    ld a2, 104(a0)
+; RV64-NEXT:    ld a3, 112(a0)
+; RV64-NEXT:    ld a4, 120(a0)
+; RV64-NEXT:    ld a5, 0(a0)
+; RV64-NEXT:    ld a6, 8(a0)
+; RV64-NEXT:    ld a7, 16(a0)
+; RV64-NEXT:    ld t0, 24(a0)
+; RV64-NEXT:    ld t1, 64(a0)
+; RV64-NEXT:    ld t2, 72(a0)
+; RV64-NEXT:    ld t3, 80(a0)
+; RV64-NEXT:    ld t4, 88(a0)
+; RV64-NEXT:    sd t1, 96(a0)
+; RV64-NEXT:    sd t2, 104(a0)
+; RV64-NEXT:    sd t3, 112(a0)
+; RV64-NEXT:    sd t4, 120(a0)
+; RV64-NEXT:    sd a5, 32(a0)
+; RV64-NEXT:    sd a6, 40(a0)
+; RV64-NEXT:    sd a7, 48(a0)
+; RV64-NEXT:    sd t0, 56(a0)
+; RV64-NEXT:    sd a1, 64(a0)
+; RV64-NEXT:    sd a2, 72(a0)
+; RV64-NEXT:    sd a3, 80(a0)
+; RV64-NEXT:    sd a4, 88(a0)
+; RV64-NEXT:    ret
+  %a = load <4 x i256>, ptr %p
+  %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 2>
+  store <4 x i256> %res, ptr %p
+  ret void
+}

From c036a9a2c2130f6e80e2969d4a8fad96265e4a11 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 07:56:56 -0800
Subject: [PATCH 30/79] [RISCV][VLOPT] Add vector single width floating point
 add subtract instructions to isSupportedInstr

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    |   6 ++
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     |  26 ++---
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll  | 100 ++++++++++++++++++
 3 files changed, 116 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 2c04dd062670f..23a1cf392ec1e 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -983,6 +983,12 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   case RISCV::VMSOF_M:
   case RISCV::VIOTA_M:
   case RISCV::VID_V:
+  // Vector Single-Width Floating-Point Add/Subtract Instructions
+  case RISCV::VFADD_VF:
+  case RISCV::VFADD_VV:
+  case RISCV::VFSUB_VF:
+  case RISCV::VFSUB_VV:
+  case RISCV::VFRSUB_VF:
   // Single-Width Floating-Point/Integer Type-Convert Instructions
   case RISCV::VFCVT_XU_F_V:
   case RISCV::VFCVT_X_F_V:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 66952cac8e00d..ce23dd0eac203 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -93,12 +93,11 @@ define void @fadd_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -229,12 +228,11 @@ define void @fsub_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -2330,13 +2328,12 @@ define void @fadd_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -2472,13 +2469,12 @@ define void @fadd_fv_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -2614,13 +2610,12 @@ define void @fsub_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -2756,13 +2751,12 @@ define void @fsub_fv_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -5004,13 +4998,13 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -5181,13 +5175,13 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 55a50a15c788c..a53b837db5461 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -2925,3 +2925,103 @@ define <vscale x 4 x i32> @vid.v(<vscale x 4 x i32> %c, iXLen %vl) {
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %c, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
+
+define <vscale x 4 x float> @vfadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfadd_vx(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfadd_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfadd_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vf v10, v8, fa0
+; VLOPT-NEXT:    vfadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfsub_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfsub.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfsub_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfsub.vv v8, v8, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfsub_vx(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfsub_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfsub.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfsub_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfsub.vf v10, v8, fa0
+; VLOPT-NEXT:    vfadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfrsub_vx(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfrsub_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfrsub.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfrsub_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfrsub.vf v10, v8, fa0
+; VLOPT-NEXT:    vfadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}

From 8beb9d393deaafde5ce7323a71cd0b0b1553444d Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 08:22:16 -0800
Subject: [PATCH 31/79] [RISCV][VLOPT] Add vector widening floating point add
 subtract instructions to isSupportedInstr

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   |   9 +
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 168 +++++++++++++++++++
 2 files changed, 177 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 23a1cf392ec1e..018556d27d763 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -989,6 +989,15 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   case RISCV::VFSUB_VF:
   case RISCV::VFSUB_VV:
   case RISCV::VFRSUB_VF:
+  // Vector Widening Floating-Point Add/Subtract Instructions
+  case RISCV::VFWADD_VV:
+  case RISCV::VFWADD_VF:
+  case RISCV::VFWSUB_VV:
+  case RISCV::VFWSUB_VF:
+  case RISCV::VFWADD_WF:
+  case RISCV::VFWADD_WV:
+  case RISCV::VFWSUB_WF:
+  case RISCV::VFWSUB_WV:
   // Single-Width Floating-Point/Integer Type-Convert Instructions
   case RISCV::VFCVT_XU_F_V:
   case RISCV::VFCVT_X_F_V:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index a53b837db5461..804a8a614a820 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3025,3 +3025,171 @@ define <vscale x 4 x float> @vfrsub_vx(<vscale x 4 x float> %a, float %b, iXLen
   %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
   ret <vscale x 4 x float> %2
 }
+
+define <vscale x 4 x double> @vfwadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwadd.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwadd.vv v12, v8, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwadd_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwadd_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwadd.vf v12, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwadd_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwadd.vf v12, v8, fa0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwsub_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwsub.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwsub_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwsub.vv v12, v8, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwsub_vx(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwsub_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwsub.vf v12, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwsub_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwsub.vf v12, v8, fa0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwadd_wv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwadd_wv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwadd.wv v8, v8, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwadd_wv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwadd.wv v8, v8, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x double> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwadd_wf(<vscale x 4 x double> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwadd_wf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwadd.wf v8, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwadd_wf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwadd.wf v8, v8, fa0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x double> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwsub_wv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwsub_wv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwsub.wv v8, v8, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwsub_wv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwsub.wv v8, v8, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x double> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwsub_wf(<vscale x 4 x double> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwsub_wf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwsub.wf v8, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwsub_wf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwsub.wf v8, v8, fa0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x double> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}

From a484fa1d0a974680d609d3f08ee6d57c9e3d21f8 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 08:28:59 -0800
Subject: [PATCH 32/79] [RISCV][VLOPT] Add floating point multiply divide
 instructions to getSupportedInstr

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    |   6 ++
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     |  24 ++---
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll  | 100 ++++++++++++++++++
 3 files changed, 114 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 018556d27d763..bf6a8068413ac 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -998,6 +998,12 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   case RISCV::VFWADD_WV:
   case RISCV::VFWSUB_WF:
   case RISCV::VFWSUB_WV:
+  // Vector Single-Width Floating-Point Multiply/Divide Instructions
+  case RISCV::VFMUL_VF:
+  case RISCV::VFMUL_VV:
+  case RISCV::VFDIV_VF:
+  case RISCV::VFDIV_VV:
+  case RISCV::VFRDIV_VF:
   // Single-Width Floating-Point/Integer Type-Convert Instructions
   case RISCV::VFCVT_XU_F_V:
   case RISCV::VFCVT_X_F_V:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index ce23dd0eac203..b8710a518287a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -363,12 +363,11 @@ define void @fmul_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -499,12 +498,11 @@ define void @fdiv_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -2892,13 +2890,12 @@ define void @fmul_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -3034,13 +3031,12 @@ define void @fmul_fv_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -3176,13 +3172,12 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -3318,13 +3313,12 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v9, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -4993,12 +4987,11 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a2)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
@@ -5170,12 +5163,11 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a2)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 804a8a614a820..bdacd17fe75c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3193,3 +3193,103 @@ define <vscale x 4 x double> @vfwsub_wf(<vscale x 4 x double> %a, float %b, iXLe
   %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
   ret <vscale x 4 x double> %2
 }
+
+define <vscale x 4 x float> @vfmul_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfmul_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmul.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmul_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmul.vv v8, v8, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmul_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfmul_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmul.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmul_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmul.vf v10, v8, fa0
+; VLOPT-NEXT:    vfadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfdiv_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfdiv_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfdiv.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfdiv_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfdiv.vv v8, v8, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfdiv_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfdiv_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfdiv.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfdiv_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfdiv.vf v10, v8, fa0
+; VLOPT-NEXT:    vfadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfrdiv_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfrdiv_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfrdiv.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfrdiv_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfrdiv.vf v10, v8, fa0
+; VLOPT-NEXT:    vfadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}

From b419edeec34844cfc31665248d670d777499340d Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 09:55:05 -0800
Subject: [PATCH 33/79] [RISCV][VLOPT] Add widening floating point multiply to
 isSupportedInstr

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   |  3 ++
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 42 ++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index bf6a8068413ac..bc7d68fad22d0 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1004,6 +1004,9 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   case RISCV::VFDIV_VF:
   case RISCV::VFDIV_VV:
   case RISCV::VFRDIV_VF:
+  // Vector Widening Floating-Point Multiply
+  case RISCV::VFWMUL_VF:
+  case RISCV::VFWMUL_VV:
   // Single-Width Floating-Point/Integer Type-Convert Instructions
   case RISCV::VFCVT_XU_F_V:
   case RISCV::VFCVT_X_F_V:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index bdacd17fe75c0..46cca43a1be89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3293,3 +3293,45 @@ define <vscale x 4 x float> @vfrdiv_vf(<vscale x 4 x float> %a, float %b, iXLen
   %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
   ret <vscale x 4 x float> %2
 }
+
+define <vscale x 4 x double> @vfwmul_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwmul_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwmul.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwmul_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwmul.vv v12, v8, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwmul_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfwmul_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwmul.vf v12, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v12, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfwmul_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfwmul.vf v12, v8, fa0
+; VLOPT-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v12, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(<vscale x 4 x double> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x double> %2
+}

From 5f70fea79fdb947ab7820aa47e5a1885691ef73a Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 10:01:15 -0800
Subject: [PATCH 34/79] [RISCV][VLOPT] Add Vector Floating-Point Compare
 Instructions to getSupportedInstr

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   |  11 +
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 200 +++++++++++++++++++
 2 files changed, 211 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index bc7d68fad22d0..6661921d66f95 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1007,6 +1007,17 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   // Vector Widening Floating-Point Multiply
   case RISCV::VFWMUL_VF:
   case RISCV::VFWMUL_VV:
+  // Vector Floating-Point Compare Instructions
+  case RISCV::VMFEQ_VF:
+  case RISCV::VMFEQ_VV:
+  case RISCV::VMFNE_VF:
+  case RISCV::VMFNE_VV:
+  case RISCV::VMFLT_VF:
+  case RISCV::VMFLT_VV:
+  case RISCV::VMFLE_VF:
+  case RISCV::VMFLE_VV:
+  case RISCV::VMFGT_VF:
+  case RISCV::VMFGE_VF:
   // Single-Width Floating-Point/Integer Type-Convert Instructions
   case RISCV::VFCVT_XU_F_V:
   case RISCV::VFCVT_X_F_V:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 46cca43a1be89..46fbba35c35a2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3335,3 +3335,203 @@ define <vscale x 4 x double> @vfwmul_vf(<vscale x 4 x float> %a, float %b, iXLen
   %2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
   ret <vscale x 4 x double> %2
 }
+
+define <vscale x 4 x i1> @vmfeq_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) {
+; NOVLOPT-LABEL: vmfeq_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmfeq.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmfeq_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmfeq.vf v10, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}
+
+define <vscale x 4 x i1> @vmfeq_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vmfeq_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmfeq.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmfeq_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmfeq.vv v12, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}
+
+define <vscale x 4 x i1> @vmfne_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) {
+; NOVLOPT-LABEL: vmfne_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmfne.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmfne_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmfne.vf v10, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}
+
+define <vscale x 4 x i1> @vmfne_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vmfne_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmfne.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmfne_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmfne.vv v12, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}
+
+define <vscale x 4 x i1> @vmflt_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) {
+; NOVLOPT-LABEL: vmflt_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmflt.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmflt_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmflt.vf v10, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}
+
+define <vscale x 4 x i1> @vmflt_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vmflt_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmflt.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmflt_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmflt.vv v12, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}
+
+define <vscale x 4 x i1> @vmfle_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) {
+; NOVLOPT-LABEL: vmfle_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmfle.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmfle_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmfle.vf v10, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}
+
+define <vscale x 4 x i1> @vmfle_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vmfle_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmfle.vv v12, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmfle_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmfle.vv v12, v8, v10
+; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}
+
+define <vscale x 4 x i1> @vmfgt_vf(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, float%c, iXLen %vl) {
+; NOVLOPT-LABEL: vmfgt_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmfgt.vf v10, v8, fa0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v10, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmfgt_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmfgt.vf v10, v8, fa0
+; VLOPT-NEXT:    vmand.mm v0, v10, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.f32(<vscale x 4 x float> %a, float %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}
+
+define <vscale x 4 x i1> @vmfgt_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vmfgt_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmflt.vv v12, v10, v8
+; NOVLOPT-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; NOVLOPT-NEXT:    vmand.mm v0, v12, v0
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmfgt_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmflt.vv v12, v10, v8
+; VLOPT-NEXT:    vmand.mm v0, v12, v0
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %c, iXLen -1)
+  %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
+  ret <vscale x 4 x i1> %2
+}

From ca10deaa72c37fb5eee8bf1c95466b12a8773e95 Mon Sep 17 00:00:00 2001
From: Kai Nacke <kai.peter.nacke@ibm.com>
Date: Thu, 9 Jan 2025 13:53:59 -0500
Subject: [PATCH 35/79] [z/OS] Add backtrace support for z/OS. (#121826)

The system call `__CELQTBCK()` is used to build a backtrace like
on other systems. The collected information are the address of the PC,
the address of the entry point (EP), the difference between both
addresses (+EP), the dynamic storage area (DSA aka the stack
pointer), and the function name.
The system call is described here:

https://www.ibm.com/docs/en/zos/3.1.0?topic=cwicsa6a-celqtbck-also-known-as-celqtbck-64-bit-traceback-service
---
 llvm/lib/Support/Unix/Signals.inc | 78 +++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 088ca33e3c8c5..50d6248ba6af8 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -79,6 +79,10 @@
 #undef HAVE__UNWIND_BACKTRACE
 #endif
 #endif
+#if ENABLE_BACKTRACES && defined(__MVS__)
+#include "llvm/Support/ConvertEBCDIC.h"
+#include <__le_cwi.h>
+#endif
 
 using namespace llvm;
 
@@ -708,6 +712,76 @@ static int unwindBacktrace(void **StackTrace, int MaxEntries) {
 }
 #endif
 
+#if ENABLE_BACKTRACES && defined(__MVS__)
+static void zosbacktrace(raw_ostream &OS) {
+  // A function name in the PPA1 can have length 16k.
+  constexpr size_t MAX_ENTRY_NAME = UINT16_MAX;
+  // Limit all other strings to 8 byte.
+  constexpr size_t MAX_OTHER = 8;
+  int32_t dsa_format = -1;                  // Input/Output
+  void *caaptr = _gtca();                   // Input
+  int32_t member_id;                        // Output
+  char compile_unit_name[MAX_OTHER];        // Output
+  void *compile_unit_address;               // Output
+  void *call_instruction_address = nullptr; // Input/Output
+  char entry_name[MAX_ENTRY_NAME];          // Output
+  void *entry_address;                      // Output
+  void *callers_instruction_address;        // Output
+  void *callers_dsaptr;                     // Output
+  int32_t callers_dsa_format;               // Output
+  char statement_id[MAX_OTHER];             // Output
+  void *cibptr;                             // Output
+  int32_t main_program;                     // Output
+  _FEEDBACK fc;                             // Output
+
+  // The DSA pointer is the value of the stack pointer r4.
+  // __builtin_frame_address() returns a pointer to the stack frame, so the
+  // stack bias has to be considered to get the expected DSA value.
+  void *dsaptr = static_cast<char *>(__builtin_frame_address(0)) - 2048;
+  int count = 0;
+  OS << " DSA  Adr                EP                 +EP         DSA           "
+        "     Entry\n";
+  while (1) {
+    // After the call, these variables contain the length of the string.
+    int32_t compile_unit_name_length = sizeof(compile_unit_name);
+    int32_t entry_name_length = sizeof(entry_name);
+    int32_t statement_id_length = sizeof(statement_id);
+    // See
+    // https://www.ibm.com/docs/en/zos/3.1.0?topic=cwicsa6a-celqtbck-also-known-as-celqtbck-64-bit-traceback-service
+    // for documentation of the parameters.
+    __CELQTBCK(&dsaptr, &dsa_format, &caaptr, &member_id, &compile_unit_name[0],
+               &compile_unit_name_length, &compile_unit_address,
+               &call_instruction_address, &entry_name[0], &entry_name_length,
+               &entry_address, &callers_instruction_address, &callers_dsaptr,
+               &callers_dsa_format, &statement_id[0], &statement_id_length,
+               &cibptr, &main_program, &fc);
+    if (fc.tok_sev) {
+      OS << format("error: CELQTBCK returned severity %d message %d\n",
+                   fc.tok_sev, fc.tok_msgno);
+      break;
+    }
+
+    if (count) { // Omit first entry.
+      uintptr_t diff = reinterpret_cast<uintptr_t>(call_instruction_address) -
+                       reinterpret_cast<uintptr_t>(entry_address);
+      OS << format(" %3d. 0x%016lX", count, call_instruction_address);
+      OS << format(" 0x%016lX +0x%08lX 0x%016lX", entry_address, diff, dsaptr);
+      SmallString<256> Str;
+      ConverterEBCDIC::convertToUTF8(StringRef(entry_name, entry_name_length),
+                                     Str);
+      OS << ' ' << Str << '\n';
+    }
+    ++count;
+    if (callers_dsaptr) {
+      dsaptr = callers_dsaptr;
+      dsa_format = callers_dsa_format;
+      call_instruction_address = callers_instruction_address;
+    } else
+      break;
+  }
+}
+#endif
+
 // In the case of a program crash or fault, print out a stack trace so that the
 // user has an indication of why and where we died.
 //
@@ -715,6 +789,9 @@ static int unwindBacktrace(void **StackTrace, int MaxEntries) {
 // doesn't demangle symbols.
 void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
 #if ENABLE_BACKTRACES
+#ifdef __MVS__
+  zosbacktrace(OS);
+#else
   static void *StackTrace[256];
   int depth = 0;
 #if defined(HAVE_BACKTRACE)
@@ -791,6 +868,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
   backtrace_symbols_fd(StackTrace, Depth, STDERR_FILENO);
 #endif
 #endif
+#endif
 }
 
 static void PrintStackTraceSignalHandler(void *) {

From c123d0c1df3363f309a2c0b7837297d3790d6ea5 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas@microsoft.com>
Date: Thu, 9 Jan 2025 11:01:44 -0800
Subject: [PATCH 36/79] [HLSL][NFC] Move packoffset validation to separate
 function and calculate offsets in bytes (#121989)

There will be more changes coming in to `SemaHLSL::ActOnFinishBuffer` so
it would be good to move the packoffset validation out to a separate
function. This change also unifies the units for cbuffer offset
calculations to bytes.
---
 clang/include/clang/Basic/Attr.td |  6 +--
 clang/lib/Sema/SemaHLSL.cpp       | 80 ++++++++++++++++++-------------
 2 files changed, 49 insertions(+), 37 deletions(-)

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 12faf06597008..e51a74655dd5e 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -4711,9 +4711,9 @@ def HLSLPackOffset: HLSLAnnotationAttr {
   let Args = [IntArgument<"Subcomponent">, IntArgument<"Component">];
   let Documentation = [HLSLPackOffsetDocs];
   let AdditionalMembers = [{
-      unsigned getOffset() {
-        return subcomponent * 4 + component;
-      }
+  unsigned getOffsetInBytes() {
+    return subcomponent * 16 + component * 4;
+  }
   }];
 }
 
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 64b6fe4cd5eb4..65ddee05a2151 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -164,18 +164,20 @@ Decl *SemaHLSL::ActOnStartBuffer(Scope *BufferScope, bool CBuffer,
   return Result;
 }
 
-// Calculate the size of a legacy cbuffer type based on
+// Calculate the size of a legacy cbuffer type in bytes based on
 // https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules
 static unsigned calculateLegacyCbufferSize(const ASTContext &Context,
                                            QualType T) {
   unsigned Size = 0;
-  constexpr unsigned CBufferAlign = 128;
+  constexpr unsigned CBufferAlign = 16;
   if (const RecordType *RT = T->getAs<RecordType>()) {
     const RecordDecl *RD = RT->getDecl();
     for (const FieldDecl *Field : RD->fields()) {
       QualType Ty = Field->getType();
       unsigned FieldSize = calculateLegacyCbufferSize(Context, Ty);
-      unsigned FieldAlign = 32;
+      // FIXME: This is not the correct alignment, it does not work for 16-bit
+      // types. See llvm/llvm-project#119641.
+      unsigned FieldAlign = 4;
       if (Ty->isAggregateType())
         FieldAlign = CBufferAlign;
       Size = llvm::alignTo(Size, FieldAlign);
@@ -194,17 +196,19 @@ static unsigned calculateLegacyCbufferSize(const ASTContext &Context,
         calculateLegacyCbufferSize(Context, VT->getElementType());
     Size = ElementSize * ElementCount;
   } else {
-    Size = Context.getTypeSize(T);
+    Size = Context.getTypeSize(T) / 8;
   }
   return Size;
 }
 
-void SemaHLSL::ActOnFinishBuffer(Decl *Dcl, SourceLocation RBrace) {
-  auto *BufDecl = cast<HLSLBufferDecl>(Dcl);
-  BufDecl->setRBraceLoc(RBrace);
-
-  // Validate packoffset.
+// Validate packoffset:
+// - if packoffset it used it must be set on all declarations inside the buffer
+// - packoffset ranges must not overlap
+static void validatePackoffset(Sema &S, HLSLBufferDecl *BufDecl) {
   llvm::SmallVector<std::pair<VarDecl *, HLSLPackOffsetAttr *>> PackOffsetVec;
+
+  // Make sure the packoffset annotations are either on all declarations
+  // or on none.
   bool HasPackOffset = false;
   bool HasNonPackOffset = false;
   for (auto *Field : BufDecl->decls()) {
@@ -219,33 +223,41 @@ void SemaHLSL::ActOnFinishBuffer(Decl *Dcl, SourceLocation RBrace) {
     }
   }
 
-  if (HasPackOffset && HasNonPackOffset)
-    Diag(BufDecl->getLocation(), diag::warn_hlsl_packoffset_mix);
-
-  if (HasPackOffset) {
-    ASTContext &Context = getASTContext();
-    // Make sure no overlap in packoffset.
-    // Sort PackOffsetVec by offset.
-    std::sort(PackOffsetVec.begin(), PackOffsetVec.end(),
-              [](const std::pair<VarDecl *, HLSLPackOffsetAttr *> &LHS,
-                 const std::pair<VarDecl *, HLSLPackOffsetAttr *> &RHS) {
-                return LHS.second->getOffset() < RHS.second->getOffset();
-              });
-
-    for (unsigned i = 0; i < PackOffsetVec.size() - 1; i++) {
-      VarDecl *Var = PackOffsetVec[i].first;
-      HLSLPackOffsetAttr *Attr = PackOffsetVec[i].second;
-      unsigned Size = calculateLegacyCbufferSize(Context, Var->getType());
-      unsigned Begin = Attr->getOffset() * 32;
-      unsigned End = Begin + Size;
-      unsigned NextBegin = PackOffsetVec[i + 1].second->getOffset() * 32;
-      if (End > NextBegin) {
-        VarDecl *NextVar = PackOffsetVec[i + 1].first;
-        Diag(NextVar->getLocation(), diag::err_hlsl_packoffset_overlap)
-            << NextVar << Var;
-      }
+  if (!HasPackOffset)
+    return;
+
+  if (HasNonPackOffset)
+    S.Diag(BufDecl->getLocation(), diag::warn_hlsl_packoffset_mix);
+
+  // Make sure there is no overlap in packoffset - sort PackOffsetVec by offset
+  // and compare adjacent values.
+  ASTContext &Context = S.getASTContext();
+  std::sort(PackOffsetVec.begin(), PackOffsetVec.end(),
+            [](const std::pair<VarDecl *, HLSLPackOffsetAttr *> &LHS,
+               const std::pair<VarDecl *, HLSLPackOffsetAttr *> &RHS) {
+              return LHS.second->getOffsetInBytes() <
+                     RHS.second->getOffsetInBytes();
+            });
+  for (unsigned i = 0; i < PackOffsetVec.size() - 1; i++) {
+    VarDecl *Var = PackOffsetVec[i].first;
+    HLSLPackOffsetAttr *Attr = PackOffsetVec[i].second;
+    unsigned Size = calculateLegacyCbufferSize(Context, Var->getType());
+    unsigned Begin = Attr->getOffsetInBytes();
+    unsigned End = Begin + Size;
+    unsigned NextBegin = PackOffsetVec[i + 1].second->getOffsetInBytes();
+    if (End > NextBegin) {
+      VarDecl *NextVar = PackOffsetVec[i + 1].first;
+      S.Diag(NextVar->getLocation(), diag::err_hlsl_packoffset_overlap)
+          << NextVar << Var;
     }
   }
+}
+
+void SemaHLSL::ActOnFinishBuffer(Decl *Dcl, SourceLocation RBrace) {
+  auto *BufDecl = cast<HLSLBufferDecl>(Dcl);
+  BufDecl->setRBraceLoc(RBrace);
+
+  validatePackoffset(SemaRef, BufDecl);
 
   SemaRef.PopDeclContext();
 }

From 1c999072221371c9008cd0aec80e387777378de0 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Thu, 9 Jan 2025 14:05:27 -0500
Subject: [PATCH 37/79] [CUDA][HIP] Fix overriding of constexpr virtual
 function (#121986)

In C++20 constexpr virtual function is allowed. In C++17 although
non-pure virtual function is not allowed to be constexpr, pure virtual
function is allowed to be constexpr and is allowed to be overriden by
non-constexpr virtual function in the derived class.

The following code compiles as C++:

```
class A
{
public:
    constexpr virtual int f() = 0;
};

class B : public A
{
public:
    int f() override
    {
        return 42;
    }
};
```

However, it fails to compile as CUDA or HIP code. The reason: A::f() is
implicitly host device function whereas B::f() is a host function. Since
they have different targets, clang does not treat B::f() as an override
of A::f(). Instead, it treats B::f() as a name-hiding non-virtual
function for A::f(), and diagnoses it.

This causes any CUDA/HIP program using C++ standard header file
`<format>` from g++-13 to fail to compile since such usage patten show
up there:

```
/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/format:3564:34: error: non-virtual member function marked 'override' hides virtual member function
 3564 |       _M_format_arg(size_t __id) override
      |                                  ^
/usr/lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/format:3538:30: note: hidden overloaded virtual function 'std::__format::_Scanner<char>::_M_format_arg' declared here
 3538 |       constexpr virtual void _M_format_arg(size_t __id) = 0;
      |                              ^
```

This is a serious issue and there is no workaround.

This patch allows non-constexpr function to override constexpr virtual
function for CUDA and HIP. This should be OK since non-constexpr
function without explicit host or device attribute can only be called in
host functions.

Fixes: SWDEV-507350
---
 clang/docs/ReleaseNotes.rst                   |  1 +
 clang/lib/Sema/SemaOverload.cpp               | 21 +++++++++++++-
 clang/test/SemaCUDA/constexpr-virtual-func.cu | 28 +++++++++++++++++++
 3 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/SemaCUDA/constexpr-virtual-func.cu

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c50260b548851..5ac886856c539 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1058,6 +1058,7 @@ RISC-V Support
 
 CUDA/HIP Language Changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^
+- Fixed a bug about overriding a constexpr pure-virtual member function with a non-constexpr virtual member function which causes compilation failure when including standard C++ header `format`.
 
 CUDA Support
 ^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 7589701fb81de..3be9ade80f1d9 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1309,6 +1309,13 @@ Sema::CheckOverload(Scope *S, FunctionDecl *New, const LookupResult &Old,
   return Ovl_Overload;
 }
 
+template <typename AttrT> static bool hasExplicitAttr(const FunctionDecl *D) {
+  assert(D && "function decl should not be null");
+  if (auto *A = D->getAttr<AttrT>())
+    return !A->isImplicit();
+  return false;
+}
+
 static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New,
                                      FunctionDecl *Old,
                                      bool UseMemberUsingDeclRules,
@@ -1583,6 +1590,7 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New,
       return true;
   }
 
+  // At this point, it is known that the two functions have the same signature.
   if (SemaRef.getLangOpts().CUDA && ConsiderCudaAttrs) {
     // Don't allow overloading of destructors.  (In theory we could, but it
     // would be a giant change to clang.)
@@ -1595,8 +1603,19 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New,
 
         // Allow overloading of functions with same signature and different CUDA
         // target attributes.
-        if (NewTarget != OldTarget)
+        if (NewTarget != OldTarget) {
+          // Special case: non-constexpr function is allowed to override
+          // constexpr virtual function
+          if (OldMethod && NewMethod && OldMethod->isVirtual() &&
+              OldMethod->isConstexpr() && !NewMethod->isConstexpr() &&
+              !hasExplicitAttr<CUDAHostAttr>(Old) &&
+              !hasExplicitAttr<CUDADeviceAttr>(Old) &&
+              !hasExplicitAttr<CUDAHostAttr>(New) &&
+              !hasExplicitAttr<CUDADeviceAttr>(New)) {
+            return false;
+          }
           return true;
+        }
       }
     }
   }
diff --git a/clang/test/SemaCUDA/constexpr-virtual-func.cu b/clang/test/SemaCUDA/constexpr-virtual-func.cu
new file mode 100644
index 0000000000000..89d909181cd94
--- /dev/null
+++ b/clang/test/SemaCUDA/constexpr-virtual-func.cu
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only \
+// RUN:            -fcuda-is-device %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only %s
+
+// expected-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+class A
+{
+public:
+    constexpr virtual int f() = 0;
+};
+
+class B : public A
+{
+public:
+    int f() override
+    {
+        return 42;
+    }
+};
+
+int test()
+{
+    B b;
+    return b.f();
+}

From 9ec92873ecc1c268a1d05e36b7b52e378860ea5b Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Thu, 9 Jan 2025 14:19:03 -0500
Subject: [PATCH 38/79] Revert "[HLSL] Move length support out of the DirectX
 Backend  (#121611)"

This reverts commit a6b7181733c83523a39d4f4e788c6b7a227d477d.
Breaks Clang :: CodeGenHLSL/builtins/length.hlsl, see
https://github.com/llvm/llvm-project/pull/121611#issuecomment-2581004278
---
 clang/include/clang/Basic/Builtins.td         |   6 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  14 ++
 clang/lib/CodeGen/CGHLSLRuntime.h             |   1 +
 clang/lib/Headers/hlsl/hlsl_detail.h          |  23 --
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      |  29 ++-
 clang/lib/Sema/SemaHLSL.cpp                   |  18 ++
 clang/test/CodeGenHLSL/builtins/length.hlsl   | 203 +++++++-----------
 .../test/SemaHLSL/BuiltIns/length-errors.hlsl |  51 ++---
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   1 +
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |  30 +++
 llvm/test/CodeGen/DirectX/length.ll           | 116 ++++++++++
 llvm/test/CodeGen/DirectX/length_error.ll     |  10 +
 .../DirectX/length_invalid_intrinsic_error.ll |  10 +
 .../length_invalid_intrinsic_error_scalar.ll  |  10 +
 14 files changed, 324 insertions(+), 198 deletions(-)
 create mode 100644 llvm/test/CodeGen/DirectX/length.ll
 create mode 100644 llvm/test/CodeGen/DirectX/length_error.ll
 create mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
 create mode 100644 llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index f4216bd01a074..468c16050e2bf 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4865,6 +4865,12 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLLength : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_length"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_lerp"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2b09197669996..ca03fb665d423 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19334,6 +19334,20 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
         ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
   }
+  case Builtin::BI__builtin_hlsl_length: {
+    Value *X = EmitScalarExpr(E->getArg(0));
+
+    assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+           "length operand must have a float representation");
+    // if the operand is a scalar, we can use the fabs llvm intrinsic directly
+    if (!E->getArg(0)->getType()->isVectorType())
+      return EmitFAbs(*this, X);
+
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/X->getType()->getScalarType(),
+        CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
+        nullptr, "hlsl.length");
+  }
   case Builtin::BI__builtin_hlsl_normalize: {
     Value *X = EmitScalarExpr(E->getArg(0));
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 00e110e8e6fa2..46e472f0aae21 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -77,6 +77,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 392075d276b18..8d5fd94133153 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -13,14 +13,6 @@ namespace hlsl {
 
 namespace __detail {
 
-template <typename T, typename U> struct is_same {
-  static const bool value = false;
-};
-
-template <typename T> struct is_same<T, T> {
-  static const bool value = true;
-};
-
 template <bool B, typename T> struct enable_if {};
 
 template <typename T> struct enable_if<true, T> {
@@ -41,21 +33,6 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
   return __builtin_bit_cast(U, F);
 }
 
-template <typename T>
-constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
-length_impl(T X) {
-  return __builtin_elementwise_abs(X);
-}
-
-template <typename T, int N>
-enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
-length_vec_impl(vector<T, N> X) {
-  vector<T, N> XSquared = X * X;
-  T XSquaredSum = XSquared[0];
-  [unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i];
-  return __builtin_elementwise_sqrt(XSquaredSum);
-}
-
 } // namespace __detail
 } // namespace hlsl
 #endif //_HLSL_HLSL_DETAILS_H_
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index cf287e598f76b..b745997f1d5a2 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1297,16 +1297,27 @@ float4 lerp(float4, float4, float4);
 ///
 /// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).
 
-const inline half length(half X) { return __detail::length_impl(X); }
-const inline float length(float X) { return __detail::length_impl(X); }
-
-template <int N> const inline half length(vector<half, N> X) {
-  return __detail::length_vec_impl(X);
-}
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
+half length(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
+half length(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
+half length(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
+half length(half4);
 
-template <int N> const inline float length(vector<float, N> X) {
-  return __detail::length_vec_impl(X);
-}
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
+float length(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
+float length(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
+float length(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
+float length(float4);
 
 //===----------------------------------------------------------------------===//
 // log builtins
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 65ddee05a2151..83c38fad2df68 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2112,6 +2112,24 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
+  case Builtin::BI__builtin_hlsl_length: {
+    if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
+      return true;
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+
+    ExprResult A = TheCall->getArg(0);
+    QualType ArgTyA = A.get()->getType();
+    QualType RetTy;
+
+    if (auto *VTy = ArgTyA->getAs<VectorType>())
+      RetTy = VTy->getElementType();
+    else
+      RetTy = TheCall->getArg(0)->getType();
+
+    TheCall->setType(RetTy);
+    break;
+  }
   case Builtin::BI__builtin_hlsl_mad: {
     if (SemaRef.checkArgCount(TheCall, 3))
       return true;
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index ecf2edcf0b05f..a24f01d275440 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -1,130 +1,73 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -finclude-default-header -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN: -emit-llvm -O1 -o - | FileCheck %s
-
-// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh(
-// CHECK-SAME: half noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.fabs.f16(half [[P0]])
-// CHECK-NEXT:    ret half [[ELT_ABS_I]]
-//
-half test_length_half(half p0)
-{
-  return length(p0);
-}
-
-// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh(
-// CHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x half> [[P0]], [[P0]]
-// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
-// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
-// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT_I]], [[VECEXT1_I]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I]])
-// CHECK-NEXT:    ret half [[TMP0]]
-//
-half test_length_half2(half2 p0)
-{
-  return length(p0);
-}
-
-// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh(
-// CHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x half> [[P0]], [[P0]]
-// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
-// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
-// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]]
-// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
-// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_1]])
-// CHECK-NEXT:    ret half [[TMP0]]
-//
-half test_length_half3(half3 p0)
-{
-  return length(p0);
-}
-
-// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh(
-// CHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x half> [[P0]], [[P0]]
-// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
-// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
-// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]]
-// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
-// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]]
-// CHECK-NEXT:    [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
-// CHECK-NEXT:    [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_2]], [[ADD_I_1]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_2]])
-// CHECK-NEXT:    ret half [[TMP0]]
-//
-half test_length_half4(half4 p0)
-{
-  return length(p0);
-}
-
-
-// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf(
-// CHECK-SAME: float noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.fabs.f32(float [[P0]])
-// CHECK-NEXT:    ret float [[ELT_ABS_I]]
-//
-float test_length_float(float p0)
-{
-  return length(p0);
-}
-
-// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f(
-// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x float> [[P0]], [[P0]]
-// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
-// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
-// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT_I]], [[VECEXT1_I]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I]])
-// CHECK-NEXT:    ret float [[TMP0]]
-//
-float test_length_float2(float2 p0)
-{
-  return length(p0);
-}
-
-// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f(
-// CHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x float> [[P0]], [[P0]]
-// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
-// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
-// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]]
-// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
-// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_1]])
-// CHECK-NEXT:    ret float [[TMP0]]
-//
-float test_length_float3(float3 p0)
-{
-  return length(p0);
-}
-
-
-// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f(
-// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x float> [[P0]], [[P0]]
-// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
-// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
-// CHECK-NEXT:    [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]]
-// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
-// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]]
-// CHECK-NEXT:    [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
-// CHECK-NEXT:    [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_2]], [[ADD_I_1]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_2]])
-// CHECK-NEXT:    ret float [[TMP0]]
-//
-float test_length_float4(float4 p0)
-{
-  return length(p0);
-}
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+// NATIVE_HALF: define noundef nofpclass(nan inf) half @
+// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16(half
+// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float
+// NATIVE_HALF: ret half
+// NO_HALF: ret float
+half test_length_half(half p0)
+{
+  return length(p0);
+}
+// NATIVE_HALF: define noundef nofpclass(nan inf) half @
+// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v2f16
+// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32(
+// NATIVE_HALF: ret half %hlsl.length
+// NO_HALF: ret float %hlsl.length
+half test_length_half2(half2 p0)
+{
+  return length(p0);
+}
+// NATIVE_HALF: define noundef nofpclass(nan inf) half @
+// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v3f16
+// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32(
+// NATIVE_HALF: ret half %hlsl.length
+// NO_HALF: ret float %hlsl.length
+half test_length_half3(half3 p0)
+{
+  return length(p0);
+}
+// NATIVE_HALF: define noundef nofpclass(nan inf) half @
+// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v4f16
+// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32(
+// NATIVE_HALF: ret half %hlsl.length
+// NO_HALF: ret float %hlsl.length
+half test_length_half4(half4 p0)
+{
+  return length(p0);
+}
+
+// CHECK: define noundef nofpclass(nan inf) float @
+// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float
+// CHECK: ret float
+float test_length_float(float p0)
+{
+  return length(p0);
+}
+// CHECK: define noundef nofpclass(nan inf) float @
+// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32(
+// CHECK: ret float %hlsl.length
+float test_length_float2(float2 p0)
+{
+  return length(p0);
+}
+// CHECK: define noundef nofpclass(nan inf) float @
+// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32(
+// CHECK: ret float %hlsl.length
+float test_length_float3(float3 p0)
+{
+  return length(p0);
+}
+// CHECK: define noundef nofpclass(nan inf) float @
+// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32(
+// CHECK: ret float %hlsl.length
+float test_length_float4(float4 p0)
+{
+  return length(p0);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
index a191f0419fbba..281faada6f5e9 100644
--- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
@@ -1,53 +1,32 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
+
 
 void test_too_few_arg()
 {
-  return length();
-  // expected-error@-1 {{no matching function for call to 'length'}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
+  return __builtin_hlsl_length();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
 }
 
 void test_too_many_arg(float2 p0)
 {
-  return length(p0, p0);
-  // expected-error@-1 {{no matching function for call to 'length'}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
-}
-
-float double_to_float_type(double p0) {
-  return length(p0);
-  // expected-error@-1  {{call to 'length' is ambiguous}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  return __builtin_hlsl_length(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
 }
 
-
-float bool_to_float_type_promotion(bool p1)
+bool builtin_bool_to_float_type_promotion(bool p1)
 {
-  return length(p1);
-  // expected-error@-1  {{call to 'length' is ambiguous}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  return __builtin_hlsl_length(p1);
+  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
 }
 
-float length_int_to_float_promotion(int p1)
+bool builtin_length_int_to_float_promotion(int p1)
 {
-  return length(p1);
-  // expected-error@-1  {{call to 'length' is ambiguous}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  return __builtin_hlsl_length(p1);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
 }
 
-float2 length_int2_to_float2_promotion(int2 p1)
+bool2 builtin_length_int2_to_float2_promotion(int2 p1)
 {
-  return length(p1);
-  // expected-error@-1  {{call to 'length' is ambiguous}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
-  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  return __builtin_hlsl_length(p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index ef48af5b42dbf..3b1d1a88e01a8 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -93,6 +93,7 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1
 def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
     [IntrNoMem]>;
 
+def int_dx_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
 def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index cf142806bb1df..51dc3025f0c37 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -57,6 +57,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_nclamp:
   case Intrinsic::dx_degrees:
   case Intrinsic::dx_lerp:
+  case Intrinsic::dx_length:
   case Intrinsic::dx_normalize:
   case Intrinsic::dx_fdot:
   case Intrinsic::dx_sdot:
@@ -291,6 +292,32 @@ static Value *expandAnyOrAllIntrinsic(CallInst *Orig,
   return Result;
 }
 
+static Value *expandLengthIntrinsic(CallInst *Orig) {
+  Value *X = Orig->getOperand(0);
+  IRBuilder<> Builder(Orig);
+  Type *Ty = X->getType();
+  Type *EltTy = Ty->getScalarType();
+
+  // Though dx.length does work on scalar type, we can optimize it to just emit
+  // fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because
+  // CGBuiltin.cpp should have emitted a fabs call.
+  Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0);
+  auto *XVec = dyn_cast<FixedVectorType>(Ty);
+  unsigned XVecSize = XVec->getNumElements();
+  if (!(Ty->isVectorTy() && XVecSize > 1))
+    report_fatal_error(Twine("Invalid input type for length intrinsic"),
+                       /* gen_crash_diag=*/false);
+
+  Value *Sum = Builder.CreateFMul(Elt, Elt);
+  for (unsigned I = 1; I < XVecSize; I++) {
+    Elt = Builder.CreateExtractElement(X, I);
+    Value *Mul = Builder.CreateFMul(Elt, Elt);
+    Sum = Builder.CreateFAdd(Sum, Mul);
+  }
+  return Builder.CreateIntrinsic(EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum},
+                                 nullptr, "elt.sqrt");
+}
+
 static Value *expandLerpIntrinsic(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
   Value *Y = Orig->getOperand(1);
@@ -562,6 +589,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_lerp:
     Result = expandLerpIntrinsic(Orig);
     break;
+  case Intrinsic::dx_length:
+    Result = expandLengthIntrinsic(Orig);
+    break;
   case Intrinsic::dx_normalize:
     Result = expandNormalizeIntrinsic(Orig);
     break;
diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll
new file mode 100644
index 0000000000000..fc5868a7f6e82
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/length.ll
@@ -0,0 +1,116 @@
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
+; RUN: opt -S  -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
+
+; Make sure dxil operation function calls for length are generated for half/float.
+
+declare half @llvm.fabs.f16(half)
+declare half @llvm.dx.length.v2f16(<2 x half>)
+declare half @llvm.dx.length.v3f16(<3 x half>)
+declare half @llvm.dx.length.v4f16(<4 x half>)
+
+declare float @llvm.fabs.f32(float)
+declare float @llvm.dx.length.v2f32(<2 x float>)
+declare float @llvm.dx.length.v3f32(<3 x float>)
+declare float @llvm.dx.length.v4f32(<4 x float>)
+
+define noundef half @test_length_half2(<2 x half> noundef %p0) {
+entry:
+  ; CHECK: extractelement <2 x half> %{{.*}}, i64 0
+  ; CHECK: fmul half %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <2 x half> %{{.*}}, i64 1
+  ; CHECK: fmul half %{{.*}}, %{{.*}}
+  ; CHECK: fadd half %{{.*}}, %{{.*}}
+  ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
+  ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
+
+  %hlsl.length = call half @llvm.dx.length.v2f16(<2 x half> %p0)
+  ret half %hlsl.length
+}
+
+define noundef half @test_length_half3(<3 x half> noundef %p0) {
+entry:
+  ; CHECK: extractelement <3 x half> %{{.*}}, i64 0
+  ; CHECK: fmul half %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <3 x half> %{{.*}}, i64 1
+  ; CHECK: fmul half %{{.*}}, %{{.*}}
+  ; CHECK: fadd half %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <3 x half> %{{.*}}, i64 2
+  ; CHECK: fmul half %{{.*}}, %{{.*}}
+  ; CHECK: fadd half %{{.*}}, %{{.*}}
+  ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
+  ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}})
+
+  %hlsl.length = call half @llvm.dx.length.v3f16(<3 x half> %p0)
+  ret half %hlsl.length
+}
+
+define noundef half @test_length_half4(<4 x half> noundef %p0) {
+entry:
+  ; CHECK: extractelement <4 x half> %{{.*}}, i64 0
+  ; CHECK: fmul half %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <4 x half> %{{.*}}, i64 1
+  ; CHECK: fmul half %{{.*}}, %{{.*}}
+  ; CHECK: fadd half %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <4 x half> %{{.*}}, i64 2
+  ; CHECK: fmul half %{{.*}}, %{{.*}}
+  ; CHECK: fadd half %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <4 x half> %{{.*}}, i64 3
+  ; CHECK: fmul half %{{.*}}, %{{.*}}
+  ; CHECK: fadd half %{{.*}}, %{{.*}}
+  ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}})
+  ; DOPCHECK:  call half @dx.op.unary.f16(i32 24, half %{{.*}})
+
+  %hlsl.length = call half @llvm.dx.length.v4f16(<4 x half> %p0)
+  ret half %hlsl.length
+}
+
+define noundef float @test_length_float2(<2 x float> noundef %p0) {
+entry:
+  ; CHECK: extractelement <2 x float> %{{.*}}, i64 0
+  ; CHECK: fmul float %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <2 x float> %{{.*}}, i64 1
+  ; CHECK: fmul float %{{.*}}, %{{.*}}
+  ; CHECK: fadd float %{{.*}}, %{{.*}}
+  ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
+  ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
+
+  %hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %p0)
+  ret float %hlsl.length
+}
+
+define noundef float @test_length_float3(<3 x float> noundef %p0) {
+entry:
+  ; CHECK: extractelement <3 x float> %{{.*}}, i64 0
+  ; CHECK: fmul float %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <3 x float> %{{.*}}, i64 1
+  ; CHECK: fmul float %{{.*}}, %{{.*}}
+  ; CHECK: fadd float %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <3 x float> %{{.*}}, i64 2
+  ; CHECK: fmul float %{{.*}}, %{{.*}}
+  ; CHECK: fadd float %{{.*}}, %{{.*}}
+  ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
+  ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}})
+
+  %hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %p0)
+  ret float %hlsl.length
+}
+
+define noundef float @test_length_float4(<4 x float> noundef %p0) {
+entry:
+  ; CHECK: extractelement <4 x float> %{{.*}}, i64 0
+  ; CHECK: fmul float %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <4 x float> %{{.*}}, i64 1
+  ; CHECK: fmul float %{{.*}}, %{{.*}}
+  ; CHECK: fadd float %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <4 x float> %{{.*}}, i64 2
+  ; CHECK: fmul float %{{.*}}, %{{.*}}
+  ; CHECK: fadd float %{{.*}}, %{{.*}}
+  ; CHECK: extractelement <4 x float> %{{.*}}, i64 3
+  ; CHECK: fmul float %{{.*}}, %{{.*}}
+  ; CHECK: fadd float %{{.*}}, %{{.*}}
+  ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}})
+  ; DOPCHECK:  call float @dx.op.unary.f32(i32 24, float %{{.*}})
+
+  %hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %p0)
+  ret float %hlsl.length
+}
diff --git a/llvm/test/CodeGen/DirectX/length_error.ll b/llvm/test/CodeGen/DirectX/length_error.ll
new file mode 100644
index 0000000000000..143b41fc506e1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/length_error.ll
@@ -0,0 +1,10 @@
+; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation length does not support double overload type
+; CHECK: Cannot create Sqrt operation: Invalid overload type
+
+define noundef double @test_length_double2(<2 x double> noundef %p0) {
+entry:
+  %hlsl.length = call double @llvm.dx.length.v2f32(<2 x double> %p0)
+  ret double %hlsl.length
+}
diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
new file mode 100644
index 0000000000000..f722de2f9029e
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error.ll
@@ -0,0 +1,10 @@
+; RUN: not opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation length does not support 1-element vector types.
+; CHECK: LLVM ERROR: Invalid input type for length intrinsic
+
+define noundef float @test_length_float(<1 x float> noundef %p0) {
+entry:
+  %hlsl.length = call float @llvm.dx.length.v1f32(<1 x float> %p0)
+  ret float %hlsl.length
+}
diff --git a/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
new file mode 100644
index 0000000000000..ac3a0513eb6b2
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/length_invalid_intrinsic_error_scalar.ll
@@ -0,0 +1,10 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation length does not support scalar types
+; CHECK: error: invalid intrinsic signature
+
+define noundef float @test_length_float(float noundef %p0) {
+entry:
+  %hlsl.length = call float @llvm.dx.length.f32(float %p0)
+  ret float %hlsl.length
+}

From 26aa20a3dd82e2ff5855bee04b22b35f6b1f026f Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Thu, 9 Jan 2025 11:21:03 -0800
Subject: [PATCH 39/79] Use range-based for to iterate over fields in record
 layout, NFCI (#122029)

Move the common case of FieldDecl::getFieldIndex() inline to mitigate
the cost of removing the extra `FieldNo` induction variable.

Also rename isNoUniqueAddress parameter to isNonVirtualBaseType, which
appears to be more accurate. I think the current name is just a
consequence of autocomplete gone wrong.
---
 clang/include/clang/AST/Decl.h              | 14 +++-
 clang/lib/AST/Decl.cpp                      | 10 +--
 clang/lib/AST/RecordLayoutBuilder.cpp       | 81 +++++++++------------
 clang/lib/CodeGen/CGRecordLayoutBuilder.cpp |  6 +-
 4 files changed, 52 insertions(+), 59 deletions(-)

diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 67ee0bb412692..16fc98aa1a57f 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -3115,8 +3115,20 @@ class FieldDecl : public DeclaratorDecl, public Mergeable<FieldDecl> {
 
   /// Returns the index of this field within its record,
   /// as appropriate for passing to ASTRecordLayout::getFieldOffset.
-  unsigned getFieldIndex() const;
+  unsigned getFieldIndex() const {
+    const FieldDecl *Canonical = getCanonicalDecl();
+    if (Canonical->CachedFieldIndex == 0) {
+      Canonical->setCachedFieldIndex();
+      assert(Canonical->CachedFieldIndex != 0);
+    }
+    return Canonical->CachedFieldIndex - 1;
+  }
 
+private:
+  /// Set CachedFieldIndex to the index of this field plus one.
+  void setCachedFieldIndex() const;
+
+public:
   /// Determines whether this field is mutable (C++ only).
   bool isMutable() const { return Mutable; }
 
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 741e908cf9bc5..97e23dd1aaa92 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -4651,12 +4651,9 @@ bool FieldDecl::isPotentiallyOverlapping() const {
   return hasAttr<NoUniqueAddressAttr>() && getType()->getAsCXXRecordDecl();
 }
 
-unsigned FieldDecl::getFieldIndex() const {
-  const FieldDecl *Canonical = getCanonicalDecl();
-  if (Canonical != this)
-    return Canonical->getFieldIndex();
-
-  if (CachedFieldIndex) return CachedFieldIndex - 1;
+void FieldDecl::setCachedFieldIndex() const {
+  assert(this == getCanonicalDecl() &&
+         "should be called on the canonical decl");
 
   unsigned Index = 0;
   const RecordDecl *RD = getParent()->getDefinition();
@@ -4670,7 +4667,6 @@ unsigned FieldDecl::getFieldIndex() const {
   }
 
   assert(CachedFieldIndex && "failed to find field in parent");
-  return CachedFieldIndex - 1;
 }
 
 SourceRange FieldDecl::getSourceRange() const {
diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index f749d3a705fc9..4493dd00d26d8 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -138,9 +138,9 @@ class EmptySubobjectMap {
     return Offset <= MaxEmptyClassOffset;
   }
 
-  CharUnits
-  getFieldOffset(const ASTRecordLayout &Layout, unsigned FieldNo) const {
-    uint64_t FieldOffset = Layout.getFieldOffset(FieldNo);
+  CharUnits getFieldOffset(const ASTRecordLayout &Layout,
+                           const FieldDecl *Field) const {
+    uint64_t FieldOffset = Layout.getFieldOffset(Field->getFieldIndex());
     assert(FieldOffset % CharWidth == 0 &&
            "Field offset not at char boundary!");
 
@@ -298,14 +298,12 @@ EmptySubobjectMap::CanPlaceBaseSubobjectAtOffset(const BaseSubobjectInfo *Info,
   }
 
   // Traverse all member variables.
-  unsigned FieldNo = 0;
-  for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(),
-       E = Info->Class->field_end(); I != E; ++I, ++FieldNo) {
-    if (I->isBitField())
+  for (const FieldDecl *Field : Info->Class->fields()) {
+    if (Field->isBitField())
       continue;
 
-    CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo);
-    if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset))
+    CharUnits FieldOffset = Offset + getFieldOffset(Layout, Field);
+    if (!CanPlaceFieldSubobjectAtOffset(Field, FieldOffset))
       return false;
   }
 
@@ -345,14 +343,12 @@ void EmptySubobjectMap::UpdateEmptyBaseSubobjects(const BaseSubobjectInfo *Info,
   }
 
   // Traverse all member variables.
-  unsigned FieldNo = 0;
-  for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(),
-       E = Info->Class->field_end(); I != E; ++I, ++FieldNo) {
-    if (I->isBitField())
+  for (const FieldDecl *Field : Info->Class->fields()) {
+    if (Field->isBitField())
       continue;
 
-    CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo);
-    UpdateEmptyFieldSubobjects(*I, FieldOffset, PlacingEmptyBase);
+    CharUnits FieldOffset = Offset + getFieldOffset(Layout, Field);
+    UpdateEmptyFieldSubobjects(Field, FieldOffset, PlacingEmptyBase);
   }
 }
 
@@ -410,15 +406,12 @@ EmptySubobjectMap::CanPlaceFieldSubobjectAtOffset(const CXXRecordDecl *RD,
   }
 
   // Traverse all member variables.
-  unsigned FieldNo = 0;
-  for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end();
-       I != E; ++I, ++FieldNo) {
-    if (I->isBitField())
+  for (const FieldDecl *Field : RD->fields()) {
+    if (Field->isBitField())
       continue;
 
-    CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo);
-
-    if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset))
+    CharUnits FieldOffset = Offset + getFieldOffset(Layout, Field);
+    if (!CanPlaceFieldSubobjectAtOffset(Field, FieldOffset))
       return false;
   }
 
@@ -465,9 +458,8 @@ EmptySubobjectMap::CanPlaceFieldSubobjectAtOffset(const FieldDecl *FD,
   return true;
 }
 
-bool
-EmptySubobjectMap::CanPlaceFieldAtOffset(const FieldDecl *FD,
-                                         CharUnits Offset) {
+bool EmptySubobjectMap::CanPlaceFieldAtOffset(const FieldDecl *FD,
+                                              CharUnits Offset) {
   if (!CanPlaceFieldSubobjectAtOffset(FD, Offset))
     return false;
 
@@ -521,15 +513,12 @@ void EmptySubobjectMap::UpdateEmptyFieldSubobjects(
   }
 
   // Traverse all member variables.
-  unsigned FieldNo = 0;
-  for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end();
-       I != E; ++I, ++FieldNo) {
-    if (I->isBitField())
+  for (const FieldDecl *Field : RD->fields()) {
+    if (Field->isBitField())
       continue;
 
-    CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo);
-
-    UpdateEmptyFieldSubobjects(*I, FieldOffset, PlacingOverlappingField);
+    CharUnits FieldOffset = Offset + getFieldOffset(Layout, Field);
+    UpdateEmptyFieldSubobjects(Field, FieldOffset, PlacingOverlappingField);
   }
 }
 
@@ -1455,10 +1444,8 @@ void ItaniumRecordLayoutBuilder::LayoutFields(const RecordDecl *D) {
   bool InsertExtraPadding = D->mayInsertExtraPadding(/*EmitRemark=*/true);
   bool HasFlexibleArrayMember = D->hasFlexibleArrayMember();
   for (auto I = D->field_begin(), End = D->field_end(); I != End; ++I) {
-    auto Next(I);
-    ++Next;
-    LayoutField(*I,
-                InsertExtraPadding && (Next != End || !HasFlexibleArrayMember));
+    LayoutField(*I, InsertExtraPadding &&
+                        (std::next(I) != End || !HasFlexibleArrayMember));
   }
 }
 
@@ -3672,35 +3659,33 @@ static void DumpRecordLayout(raw_ostream &OS, const RecordDecl *RD,
   }
 
   // Dump fields.
-  uint64_t FieldNo = 0;
-  for (RecordDecl::field_iterator I = RD->field_begin(),
-         E = RD->field_end(); I != E; ++I, ++FieldNo) {
-    const FieldDecl &Field = **I;
-    uint64_t LocalFieldOffsetInBits = Layout.getFieldOffset(FieldNo);
+  for (const FieldDecl *Field : RD->fields()) {
+    uint64_t LocalFieldOffsetInBits =
+        Layout.getFieldOffset(Field->getFieldIndex());
     CharUnits FieldOffset =
       Offset + C.toCharUnitsFromBits(LocalFieldOffsetInBits);
 
     // Recursively dump fields of record type.
-    if (auto RT = Field.getType()->getAs<RecordType>()) {
+    if (auto RT = Field->getType()->getAs<RecordType>()) {
       DumpRecordLayout(OS, RT->getDecl(), C, FieldOffset, IndentLevel,
-                       Field.getName().data(),
+                       Field->getName().data(),
                        /*PrintSizeInfo=*/false,
                        /*IncludeVirtualBases=*/true);
       continue;
     }
 
-    if (Field.isBitField()) {
+    if (Field->isBitField()) {
       uint64_t LocalFieldByteOffsetInBits = C.toBits(FieldOffset - Offset);
       unsigned Begin = LocalFieldOffsetInBits - LocalFieldByteOffsetInBits;
-      unsigned Width = Field.getBitWidthValue(C);
+      unsigned Width = Field->getBitWidthValue(C);
       PrintBitFieldOffset(OS, FieldOffset, Begin, Width, IndentLevel);
     } else {
       PrintOffset(OS, FieldOffset, IndentLevel);
     }
     const QualType &FieldType = C.getLangOpts().DumpRecordLayoutsCanonical
-                                    ? Field.getType().getCanonicalType()
-                                    : Field.getType();
-    OS << FieldType << ' ' << Field << '\n';
+                                    ? Field->getType().getCanonicalType()
+                                    : Field->getType();
+    OS << FieldType << ' ' << *Field << '\n';
   }
 
   // Dump virtual bases.
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index ea44e6f21f3c8..209ef236f0d5d 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -182,7 +182,7 @@ struct CGRecordLowering {
                        llvm::Type *StorageType);
   /// Lowers an ASTRecordLayout to a llvm type.
   void lower(bool NonVirtualBaseType);
-  void lowerUnion(bool isNoUniqueAddress);
+  void lowerUnion(bool isNonVirtualBaseType);
   void accumulateFields(bool isNonVirtualBaseType);
   RecordDecl::field_iterator
   accumulateBitFields(bool isNonVirtualBaseType,
@@ -310,9 +310,9 @@ void CGRecordLowering::lower(bool NVBaseType) {
   computeVolatileBitfields();
 }
 
-void CGRecordLowering::lowerUnion(bool isNoUniqueAddress) {
+void CGRecordLowering::lowerUnion(bool isNonVirtualBaseType) {
   CharUnits LayoutSize =
-      isNoUniqueAddress ? Layout.getDataSize() : Layout.getSize();
+      isNonVirtualBaseType ? Layout.getDataSize() : Layout.getSize();
   llvm::Type *StorageType = nullptr;
   bool SeenNamedMember = false;
   // Iterate through the fields setting bitFieldInfo and the Fields array. Also

From 5ff36748cfeee1d02da6512ad578e4014724f67e Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Thu, 9 Jan 2025 10:39:38 -0800
Subject: [PATCH 40/79] [SLP]Fix mask processing for reused gathered scalars

Need to sync the mask between cost and actual emission to avoid bugs in
mask calculation

Fixes #122324
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 14 +++++-
 .../X86/shuffle-mask-emission.ll              | 43 +++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 36fed8937aec2..48ed612c11b36 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10973,7 +10973,19 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
       }
     }
 
-    ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true);
+    if (!ExtMask.empty()) {
+      if (CommonMask.empty()) {
+        CommonMask.assign(ExtMask.begin(), ExtMask.end());
+      } else {
+        SmallVector<int> NewMask(ExtMask.size(), PoisonMaskElem);
+        for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) {
+          if (ExtMask[I] == PoisonMaskElem)
+            continue;
+          NewMask[I] = CommonMask[ExtMask[I]];
+        }
+        CommonMask.swap(NewMask);
+      }
+    }
     if (CommonMask.empty()) {
       assert(InVectors.size() == 1 && "Expected only one vector with no mask");
       return Cost;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
new file mode 100644
index 0000000000000..fcc295de62adf
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i1 @test() {
+; CHECK-LABEL: define i1 @test() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[H_PROMOTED118_I_FR:%.*]] = freeze i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP3]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 0, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[TMP6]], <i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP8]], false
+; CHECK-NEXT:    ret i1 [[OP_RDX]]
+;
+entry:
+  %h.promoted118.i.fr = freeze i32 1
+  %invariant.op.i51 = add i32 %h.promoted118.i.fr, 0
+  %conv25.i = xor i32 0, 0
+  %add.i.i = add i32 %conv25.i, %h.promoted118.i.fr
+  %sext.i.mask = and i32 %add.i.i, 0
+  %cmp27.i = icmp eq i32 %sext.i.mask, 1
+  %0 = or i1 %cmp27.i, false
+  %conv25.i.1 = add i32 0, 0
+  %add.i.i.1 = add i32 %conv25.i.1, %h.promoted118.i.fr
+  %sext.i.1.mask = and i32 %add.i.i.1, 1
+  %cmp27.i.1 = icmp eq i32 %sext.i.1.mask, 0
+  %conv25.1.i.1 = xor i32 0, 0
+  %add.i.1.i.1 = add i32 %conv25.1.i.1, %h.promoted118.i.fr
+  %sext.1.i.1.mask = and i32 %add.i.1.i.1, 1
+  %cmp27.1.i.1 = icmp eq i32 %sext.1.i.1.mask, 0
+  %add.i.2.reass.i.1 = add i32 %invariant.op.i51, %conv25.i.1
+  %sext.2.i.1.mask = and i32 %add.i.2.reass.i.1, 1
+  %cmp27.2.i.1 = icmp eq i32 %sext.2.i.1.mask, 0
+  %1 = or i1 %cmp27.1.i.1, %cmp27.2.i.1
+  %2 = or i1 %cmp27.i.1, %1
+  %3 = or i1 %0, %2
+  ret i1 %3
+}

From 1842a3d833d934793012c717e98b10d51193fd0d Mon Sep 17 00:00:00 2001
From: Paul Bowen-Huggett <paulhuggett@mac.com>
Date: Thu, 9 Jan 2025 20:26:07 +0100
Subject: [PATCH 41/79] Fix a cmake error when using the Xcode generator.
 (#119403)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I’m seeing a series of errors when trying to run the cmake configure
step on macOS when the cmake generator is set to Xcode. All is well if I
use the Ninja or Unix Makefile generators. Messages are all of the form:
~~~
CMake Error at …llvm-project/clang/cmake/modules/AddClang.cmake:120
(target_compile_definitions):
  Cannot specify compile definitions for target "obj.clangBasic" which
  is not built by this project.
Call Stack (most recent call first):
  …llvm-project/clang/lib/Basic/CMakeLists.txt:57 (add_clang_library)
~~~
The remaining errors are similar but mention targets obj.clangAPINotes,
obj.clangLex, obj.clangParse, and so on.

The regression appears to have been introduced by commit 09fa2f012fcc
(Oct 14 2024) which added the code in this area.

My proposed solution is simply to add a test to ensure that the obj.x
target exists before setting its compile definitions. There is precedent
doing just this in both clang/cmake/modules/AddClang.cmake and
clang/lib/support/CMakeLists.txt as well as in the “MSVC AND NOT
CLANG_LINK_CLANG_DYLIB” path immediately above the offending line.

I’ve also made a couple of grammatical tweaks in the comments
surrounding this code.

In case it's relevant, the cmake settings and definitions I've used to
trigger these errors is:
~~~bash
GENERATOR="Xcode"
OUTDIR=build_macos
cmake \
-S "$SCRIPT_DIR/llvm" \
-B "$SCRIPT_DIR/$OUTDIR" \
-G "$GENERATOR" \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_OSX_ARCHITECTURES=arm64 \
-D LLVM_PARALLEL_LINK_JOBS=1 \
-D LLVM_ENABLE_PROJECTS="clang;lld" \
-D LLVM_TARGETS_TO_BUILD=RISCV \
-D LLVM_DEFAULT_TARGET_TRIPLE=riscv32-unknown-elf \
-D LLVM_OPTIMIZED_TABLEGEN=Yes
~~~
(cmake v3.31.1, Xcode 16.1. I know that not all of these variables are
useful for the Xcode generator!)

Co-authored-by: Paul Bowen-Huggett <phuggett@keysom.io>
---
 clang/cmake/modules/AddClang.cmake | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake
index 091aec98e93ca..cdc8bd5cd503b 100644
--- a/clang/cmake/modules/AddClang.cmake
+++ b/clang/cmake/modules/AddClang.cmake
@@ -109,13 +109,14 @@ macro(add_clang_library name)
   llvm_add_library(${name} ${LIBTYPE} ${ARG_UNPARSED_ARGUMENTS} ${srcs})
 
   if(MSVC AND NOT CLANG_LINK_CLANG_DYLIB)
-    # Make sure all consumers also turn off visibility macros so there not trying to dllimport symbols.
+    # Make sure all consumers also turn off visibility macros so they're not
+    # trying to dllimport symbols.
     target_compile_definitions(${name} PUBLIC CLANG_BUILD_STATIC)
     if(TARGET "obj.${name}")
       target_compile_definitions("obj.${name}" PUBLIC CLANG_BUILD_STATIC)
     endif()
-  elseif(NOT ARG_SHARED AND NOT ARG_STATIC)
-    # Clang component libraries linked in to clang-cpp are declared without SHARED or STATIC
+  elseif(TARGET "obj.${name}" AND NOT ARG_SHARED AND NOT ARG_STATIC)
+    # Clang component libraries linked to clang-cpp are declared without SHARED or STATIC
     target_compile_definitions("obj.${name}" PUBLIC CLANG_EXPORTS)
   endif()
 

From 2c6ed5f7eec30c22e136c03777eda9a8c4c0c79b Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Thu, 9 Jan 2025 14:26:19 -0500
Subject: [PATCH 42/79] [libc++][NFC] Remove trailing whitespace from release
 notes

---
 libcxx/docs/ReleaseNotes/20.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index 793b172a24af0..9a520d8452b06 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -125,9 +125,9 @@ Deprecations and Removals
   supported as an extension anymore, please migrate any code that uses e.g. ``std::vector<const T>`` to be
   standards conforming.
 
-- Non-conforming member typedefs ``base``, ``iterator``, ``const_iterator``, and ``const_reference`` of ``std::bitset``, 
+- Non-conforming member typedefs ``base``, ``iterator``, ``const_iterator``, and ``const_reference`` of ``std::bitset``,
   and member typedef ``base`` of ``std::forward_list`` and ``std::list`` are removed. Previously, these member typedefs
-  (except ``const_reference``) were private but could cause ambiguity in name lookup. Code that expects such ambiguity  
+  (except ``const_reference``) were private but could cause ambiguity in name lookup. Code that expects such ambiguity
   will possibly not compile in LLVM 20.
 
 - The function ``__libcpp_verbose_abort()`` is now ``noexcept``, to match ``std::terminate()``. (The combination of

From b16777afb0c1799bb3eaa63c66213180c99c9d25 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 9 Jan 2025 11:36:09 -0800
Subject: [PATCH 43/79] [RISCV] Return MILog2SEW for mask instructions
 getOperandLog2EEW. NFC (#122332)

The SEW operand for these instructions should have a value of 0. This
matches what was done for vcpop/vfirst.
---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 6661921d66f95..89684decde6c1 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -637,7 +637,7 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
   case RISCV::VMSBF_M:
   case RISCV::VMSIF_M:
   case RISCV::VMSOF_M: {
-    return 0;
+    return MILog2SEW;
   }
 
   // Vector Iota Instruction

From 876841b0e2aea4ae8dac58842242e311b8aef432 Mon Sep 17 00:00:00 2001
From: Heejin Ahn <aheejin@gmail.com>
Date: Thu, 9 Jan 2025 11:37:40 -0800
Subject: [PATCH 44/79] [WebAssembly] Format WebAssembly ReleaseNote entries
 (#122203)

---
 llvm/docs/ReleaseNotes.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 3463dc8339fd8..4d5f5fb0384b8 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -240,15 +240,15 @@ Changes to the RISC-V Backend
 Changes to the WebAssembly Backend
 ----------------------------------
 
-The default target CPU, "generic", now enables the `-mnontrapping-fptoint`
-and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations]
-and [Non-trapping float-to-int Conversions] language features, which are
-[widely implemented in engines].
-
-A new Lime1 target CPU is added, -mcpu=lime1. This CPU follows the definition of
-the Lime1 CPU [here], and enables -mmultivalue, -mmutable-globals,
--mcall-indirect-overlong, -msign-ext, -mbulk-memory-opt, -mnontrapping-fptoint,
-and -mextended-const.
+* The default target CPU, "generic", now enables the `-mnontrapping-fptoint`
+  and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations]
+  and [Non-trapping float-to-int Conversions] language features, which are
+  [widely implemented in engines].
+
+* A new Lime1 target CPU is added, `-mcpu=lime1`. This CPU follows the
+  definition of the Lime1 CPU [here], and enables `-mmultivalue`,
+  `-mmutable-globals`, `-mcall-indirect-overlong`, `-msign-ext`,
+  `-mbulk-memory-opt`, `-mnontrapping-fptoint`, and `-mextended-const`.
 
 [Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md
 [Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md

From 1b897f737df2097f8fee1b203676ea7f01dff06d Mon Sep 17 00:00:00 2001
From: Maksim Ivanov <emaxx@google.com>
Date: Thu, 9 Jan 2025 20:40:31 +0100
Subject: [PATCH 45/79] [clang-tidy] Fix misc-unused-parameters on params with
 attrs (#122286)

Don't suggest to comment-out the parameter name if the parameter has an
attribute that's spelled after the parameter name.

This prevents the parameter's attributes from being wrongly applied to
the parameter's type.

This fixes #122191.
---
 .../clang-tidy/misc/UnusedParametersCheck.cpp | 19 +++++++++++++++++++
 .../checkers/misc/unused-parameters.cpp       | 16 ++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp
index c2d9286312dc4..d792b5c52191f 100644
--- a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp
@@ -9,8 +9,11 @@
 #include "UnusedParametersCheck.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/STLExtras.h"
 #include <unordered_map>
@@ -26,6 +29,17 @@ bool isOverrideMethod(const FunctionDecl *Function) {
     return MD->size_overridden_methods() > 0 || MD->hasAttr<OverrideAttr>();
   return false;
 }
+
+bool hasAttrAfterParam(const SourceManager *SourceManager,
+                       const ParmVarDecl *Param) {
+  for (const auto *Attr : Param->attrs()) {
+    if (SourceManager->isBeforeInTranslationUnit(Param->getLocation(),
+                                                 Attr->getLocation())) {
+      return true;
+    }
+  }
+  return false;
+}
 } // namespace
 
 void UnusedParametersCheck::registerMatchers(MatchFinder *Finder) {
@@ -189,6 +203,11 @@ void UnusedParametersCheck::check(const MatchFinder::MatchResult &Result) {
     if (Param->isUsed() || Param->isReferenced() || !Param->getDeclName() ||
         Param->hasAttr<UnusedAttr>())
       continue;
+    if (hasAttrAfterParam(Result.SourceManager, Param)) {
+      // Due to how grammar works, attributes would be wrongly applied to the
+      // type if we remove the preceding parameter name.
+      continue;
+    }
 
     // In non-strict mode ignore function definitions with empty bodies
     // (constructor initializer counts for non-empty body).
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp
index a3fcf30f273ef..524de45463e36 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp
@@ -33,6 +33,16 @@ void f(void (*fn)()) {;}
 // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: parameter 'fn' is unused [misc-unused-parameters]
 // CHECK-FIXES: {{^}}void f(void (* /*fn*/)()) {;}{{$}}
 
+int *k([[clang::lifetimebound]] int *i) {;}
+// CHECK-MESSAGES: :[[@LINE-1]]:38: warning: parameter 'i' is unused [misc-unused-parameters]
+// CHECK-FIXES: {{^}}int *k({{\[\[clang::lifetimebound\]\]}} int * /*i*/) {;}{{$}}
+
+#define ATTR_BEFORE(x) [[clang::lifetimebound]] x
+int* m(ATTR_BEFORE(const int *i)) { return nullptr; }
+// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: parameter 'i' is unused [misc-unused-parameters]
+// CHECK-FIXES: {{^}}int* m(ATTR_BEFORE(const int * /*i*/)) { return nullptr; }{{$}}
+#undef ATTR_BEFORE
+
 // Unchanged cases
 // ===============
 void f(int i); // Don't remove stuff in declarations
@@ -42,6 +52,12 @@ void s(int i[1]);
 void u(void (*fn)());
 void w(int i) { (void)i; } // Don't remove used parameters
 
+// Don't reanchor the attribute to the type:
+int *x(int *i [[clang::lifetimebound]]) { return nullptr; }
+#define ATTR_AFTER(x) x [[clang::lifetimebound]]
+int* y(ATTR_AFTER(const int *i)) { return nullptr; }
+#undef ATTR_AFTER
+
 bool useLambda(int (*fn)(int));
 static bool static_var = useLambda([] (int a) { return a; });
 

From 0aa831e0edb1c1deabb96ce2435667cc82bac79b Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak@amd.com>
Date: Thu, 9 Jan 2025 11:42:22 -0800
Subject: [PATCH 46/79] [mlir][GPU] Implement ValueBoundsOpInterface for GPU ID
 operations (#122190)

The GPU ID operations already implement InferIntRangeInterface, which
gives constant lower and upper bounds on those IDs when appropriate
metadata is prentent on the operations or in the surrounding context.

This commit uses that existing code to implement the
ValueBoundsOpInterface, which is used when analyzing affine operations
(unlike the integer range interface, which is used for arithmetic
optimization).

It also implements the interface for gpu.launch, where we can use it to
express the constraint that block/grid sizes are equal to their value
from outside the launch op and that the corresponding IDs are bounded
above by that size.

As a consequence, the test pass for this inference is updated to work on
a FunctionOpInterface and not a func.func, creating minor churn in other
tests.
---
 .../GPU/IR/ValueBoundsOpInterfaceImpl.h       |  19 +++
 mlir/include/mlir/InitAllDialects.h           |   2 +
 mlir/lib/Dialect/GPU/CMakeLists.txt           |   3 +-
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp        |   5 +
 .../GPU/IR/ValueBoundsOpInterfaceImpl.cpp     | 114 +++++++++++++
 .../value-bounds-op-interface-impl.mlir       |   2 +-
 .../Affine/value-bounds-reification.mlir      |   4 +-
 .../Arith/value-bounds-op-interface-impl.mlir |   4 +-
 .../GPU/value-bounds-op-interface-impl.mlir   | 159 ++++++++++++++++++
 .../value-bounds-op-interface-impl.mlir       |   2 +-
 .../value-bounds-op-interface-impl.mlir       |   2 +-
 .../SCF/value-bounds-op-interface-impl.mlir   |   2 +-
 .../value-bounds-op-interface-impl.mlir       |   2 +-
 .../Dialect/Vector/test-scalable-bounds.mlir  |   2 +-
 .../value-bounds-op-interface-impl.mlir       |   2 +-
 .../Dialect/Affine/TestReifyValueBounds.cpp   |   8 +-
 16 files changed, 317 insertions(+), 15 deletions(-)
 create mode 100644 mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h
 create mode 100644 mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp
 create mode 100644 mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir

diff --git a/mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h b/mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h
new file mode 100644
index 0000000000000..9a4e159ef76c8
--- /dev/null
+++ b/mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h
@@ -0,0 +1,19 @@
+//===- ValueBoundsOpInterfaceImpl.h - Impl. of ValueBoundsOpInterface -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H
+#define MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H
+
+namespace mlir {
+class DialectRegistry;
+
+namespace gpu {
+void registerValueBoundsOpInterfaceExternalModels(DialectRegistry &registry);
+} // namespace gpu
+} // namespace mlir
+#endif // MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
index 7fd0432ddce1b..c102f811cce4b 100644
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -37,6 +37,7 @@
 #include "mlir/Dialect/EmitC/IR/EmitC.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h"
 #include "mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h"
 #include "mlir/Dialect/IRDL/IR/IRDL.h"
 #include "mlir/Dialect/Index/IR/IndexDialect.h"
@@ -164,6 +165,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
   cf::registerBufferizableOpInterfaceExternalModels(registry);
   cf::registerBufferDeallocationOpInterfaceExternalModels(registry);
   gpu::registerBufferDeallocationOpInterfaceExternalModels(registry);
+  gpu::registerValueBoundsOpInterfaceExternalModels(registry);
   LLVM::registerInlinerInterface(registry);
   linalg::registerAllDialectInterfaceImplementations(registry);
   linalg::registerRuntimeVerifiableOpInterfaceExternalModels(registry);
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 1026e9b509332..013311ec027da 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_mlir_dialect_library(MLIRGPUDialect
   IR/GPUDialect.cpp
   IR/InferIntRangeInterfaceImpls.cpp
+  IR/ValueBoundsOpInterfaceImpl.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
@@ -40,7 +41,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
   Transforms/ShuffleRewriter.cpp
   Transforms/SPIRVAttachTarget.cpp
   Transforms/SubgroupReduceLowering.cpp
-  
+
   OBJECT
 
   ADDITIONAL_HEADER_DIRS
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 8e36638d6e545..49209229259a7 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -29,6 +29,7 @@
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/Interfaces/FunctionImplementation.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/TypeSwitch.h"
@@ -217,6 +218,10 @@ void GPUDialect::initialize() {
   addInterfaces<GPUInlinerInterface>();
   declarePromisedInterface<bufferization::BufferDeallocationOpInterface,
                            TerminatorOp>();
+  declarePromisedInterfaces<
+      ValueBoundsOpInterface, ClusterDimOp, ClusterDimBlocksOp, ClusterIdOp,
+      ClusterBlockIdOp, BlockDimOp, BlockIdOp, GridDimOp, ThreadIdOp, LaneIdOp,
+      SubgroupIdOp, GlobalIdOp, NumSubgroupsOp, SubgroupSizeOp, LaunchOp>();
 }
 
 static std::string getSparseHandleKeyword(SparseHandleKind kind) {
diff --git a/mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp
new file mode 100644
index 0000000000000..3bb7082daa5a0
--- /dev/null
+++ b/mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp
@@ -0,0 +1,114 @@
+//===- ValueBoundsOpInterfaceImpl.cpp - Impl. of ValueBoundsOpInterface ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h"
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Interfaces/InferIntRangeInterface.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
+
+using namespace mlir;
+using namespace mlir::gpu;
+
+namespace {
+/// Implement ValueBoundsOpInterface (which only works on index-typed values,
+/// gathers a set of constraint expressions, and is used for affine analyses)
+/// in terms of InferIntRangeInterface (which works
+/// on arbitrary integer types, creates [min, max] ranges, and is used in for
+/// arithmetic simplification).
+template <typename Op>
+struct GpuIdOpInterface
+    : public ValueBoundsOpInterface::ExternalModel<GpuIdOpInterface<Op>, Op> {
+  void populateBoundsForIndexValue(Operation *op, Value value,
+                                   ValueBoundsConstraintSet &cstr) const {
+    auto inferrable = cast<InferIntRangeInterface>(op);
+    assert(value == op->getResult(0) &&
+           "inferring for value that isn't the GPU op's result");
+    auto translateConstraint = [&](Value v, const ConstantIntRanges &range) {
+      assert(v == value &&
+             "GPU ID op inferring values for something that's not its result");
+      cstr.bound(v) >= range.smin().getSExtValue();
+      cstr.bound(v) <= range.smax().getSExtValue();
+    };
+    assert(inferrable->getNumOperands() == 0 && "ID ops have no operands");
+    inferrable.inferResultRanges({}, translateConstraint);
+  }
+};
+
+struct GpuLaunchOpInterface
+    : public ValueBoundsOpInterface::ExternalModel<GpuLaunchOpInterface,
+                                                   LaunchOp> {
+  void populateBoundsForIndexValue(Operation *op, Value value,
+                                   ValueBoundsConstraintSet &cstr) const {
+    auto launchOp = cast<LaunchOp>(op);
+
+    Value sizeArg = nullptr;
+    bool isSize = false;
+    KernelDim3 gridSizeArgs = launchOp.getGridSizeOperandValues();
+    KernelDim3 blockSizeArgs = launchOp.getBlockSizeOperandValues();
+
+    auto match = [&](KernelDim3 bodyArgs, KernelDim3 externalArgs,
+                     bool areSizeArgs) {
+      if (value == bodyArgs.x) {
+        sizeArg = externalArgs.x;
+        isSize = areSizeArgs;
+      }
+      if (value == bodyArgs.y) {
+        sizeArg = externalArgs.y;
+        isSize = areSizeArgs;
+      }
+      if (value == bodyArgs.z) {
+        sizeArg = externalArgs.z;
+        isSize = areSizeArgs;
+      }
+    };
+    match(launchOp.getThreadIds(), blockSizeArgs, false);
+    match(launchOp.getBlockSize(), blockSizeArgs, true);
+    match(launchOp.getBlockIds(), gridSizeArgs, false);
+    match(launchOp.getGridSize(), gridSizeArgs, true);
+    if (launchOp.hasClusterSize()) {
+      KernelDim3 clusterSizeArgs = *launchOp.getClusterSizeOperandValues();
+      match(*launchOp.getClusterIds(), clusterSizeArgs, false);
+      match(*launchOp.getClusterSize(), clusterSizeArgs, true);
+    }
+
+    if (!sizeArg)
+      return;
+    if (isSize) {
+      cstr.bound(value) == cstr.getExpr(sizeArg);
+      cstr.bound(value) >= 1;
+    } else {
+      cstr.bound(value) < cstr.getExpr(sizeArg);
+      cstr.bound(value) >= 0;
+    }
+  }
+};
+} // namespace
+
+void mlir::gpu::registerValueBoundsOpInterfaceExternalModels(
+    DialectRegistry &registry) {
+  registry.addExtension(+[](MLIRContext *ctx, GPUDialect *dialect) {
+#define REGISTER(X) X::attachInterface<GpuIdOpInterface<X>>(*ctx);
+    REGISTER(ClusterDimOp)
+    REGISTER(ClusterDimBlocksOp)
+    REGISTER(ClusterIdOp)
+    REGISTER(ClusterBlockIdOp)
+    REGISTER(BlockDimOp)
+    REGISTER(BlockIdOp)
+    REGISTER(GridDimOp)
+    REGISTER(ThreadIdOp)
+    REGISTER(LaneIdOp)
+    REGISTER(SubgroupIdOp)
+    REGISTER(GlobalIdOp)
+    REGISTER(NumSubgroupsOp)
+    REGISTER(SubgroupSizeOp)
+#undef REGISTER
+
+    LaunchOp::attachInterface<GpuLaunchOpInterface>(*ctx);
+  });
+}
diff --git a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir
index 5354eb38d7b03..a4310b91a37b3 100644
--- a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir
+++ b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
 // RUN:     -split-input-file | FileCheck %s
 
 // CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 + s1)>
diff --git a/mlir/test/Dialect/Affine/value-bounds-reification.mlir b/mlir/test/Dialect/Affine/value-bounds-reification.mlir
index 75622f59af83b..817614be50533 100644
--- a/mlir/test/Dialect/Affine/value-bounds-reification.mlir
+++ b/mlir/test/Dialect/Affine/value-bounds-reification.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \
 // RUN:     -verify-diagnostics -split-input-file | FileCheck %s
 
-// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args use-arith-ops" \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args use-arith-ops}))' \
 // RUN:     -verify-diagnostics -split-input-file | FileCheck %s --check-prefix=CHECK-ARITH
 
 // CHECK-LABEL: func @reify_through_chain(
diff --git a/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir
index 8fb3ba1a1ecce..a2653d4750ec8 100644
--- a/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir
+++ b/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
 // RUN:     -verify-diagnostics -split-input-file | FileCheck %s
 
-// RUN: mlir-opt %s -test-affine-reify-value-bounds="use-arith-ops" \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{use-arith-ops}))' \
 // RUN:     -verify-diagnostics -split-input-file | \
 // RUN: FileCheck %s --check-prefix=CHECK-ARITH
 
diff --git a/mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir
new file mode 100644
index 0000000000000..6facf1e22aab9
--- /dev/null
+++ b/mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir
@@ -0,0 +1,159 @@
+// RUN: mlir-opt %s -pass-pipeline='builtin.module( \
+// RUN:       func.func(test-affine-reify-value-bounds), \
+// RUN:       gpu.module(llvm.func(test-affine-reify-value-bounds)), \
+// RUN:       gpu.module(gpu.func(test-affine-reify-value-bounds)))' \
+// RUN:     -verify-diagnostics \
+// RUN:     -split-input-file | FileCheck %s
+
+// CHECK-LABEL: func @launch_func
+func.func @launch_func(%arg0 : index) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c2 = arith.constant 2 : index
+  %c4 = arith.constant 4 : index
+  %c64 = arith.constant 64 : index
+  gpu.launch blocks(%block_id_x, %block_id_y, %block_id_z) in (%grid_dim_x = %arg0, %grid_dim_y = %c4, %grid_dim_z = %c2)
+      threads(%thread_id_x, %thread_id_y, %thread_id_z) in (%block_dim_x = %c64, %block_dim_y = %c4, %block_dim_z = %c2) {
+
+    // Sanity checks:
+    // expected-error @below{{unknown}}
+    "test.compare" (%thread_id_x, %c1) {cmp = "EQ"} : (index, index) -> ()
+    // expected-remark @below{{false}}
+    "test.compare" (%thread_id_x, %c64) {cmp = "GE"} : (index, index) -> ()
+
+    // expected-remark @below{{true}}
+    "test.compare" (%grid_dim_x, %c1) {cmp = "GE"}  : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare" (%grid_dim_x, %arg0) {cmp = "EQ"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare" (%grid_dim_y, %c4) {cmp = "EQ"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare" (%grid_dim_z, %c2) {cmp = "EQ"} : (index, index) -> ()
+
+    // expected-remark @below{{true}}
+    "test.compare"(%block_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%block_id_x, %arg0) {cmp = "LT"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%block_id_y, %c0) {cmp = "GE"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%block_id_y, %c4) {cmp = "LT"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%block_id_z, %c0) {cmp = "GE"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%block_id_z, %c2) {cmp = "LT"} : (index, index) -> ()
+
+    // expected-remark @below{{true}}
+    "test.compare" (%block_dim_x, %c64) {cmp = "EQ"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare" (%block_dim_y, %c4) {cmp = "EQ"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare" (%block_dim_z, %c2) {cmp = "EQ"} : (index, index) -> ()
+
+    // expected-remark @below{{true}}
+    "test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%thread_id_x, %c64) {cmp = "LT"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%thread_id_y, %c0) {cmp = "GE"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%thread_id_y, %c4) {cmp = "LT"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%thread_id_z, %c0) {cmp = "GE"} : (index, index) -> ()
+    // expected-remark @below{{true}}
+    "test.compare"(%thread_id_z, %c2) {cmp = "LT"} : (index, index) -> ()
+
+    // expected-remark @below{{true}}
+    "test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> ()
+    gpu.terminator
+  }
+
+  func.return
+}
+
+// -----
+
+// The tests for what the ranges are are located in int-range-interface.mlir,
+// so here we just make sure that the results of that interface propagate into
+// constraints.
+
+// CHECK-LABEL: func @kernel
+module attributes {gpu.container_module} {
+  gpu.module @gpu_module {
+    llvm.func @kernel() attributes {gpu.kernel} {
+
+      %c0 = arith.constant 0 : index
+      %ctid_max = arith.constant 4294967295 : index
+      %thread_id_x = gpu.thread_id x
+
+      // expected-remark @below{{true}}
+      "test.compare" (%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
+      // expected-remark @below{{true}}
+      "test.compare" (%thread_id_x, %ctid_max) {cmp = "LT"} : (index, index) -> ()
+      llvm.return
+    }
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func @annotated_kernel
+module attributes {gpu.container_module} {
+  gpu.module @gpu_module {
+    gpu.func @annotated_kernel() kernel
+      attributes {known_block_size = array<i32: 8, 12, 16>,
+          known_grid_size = array<i32: 20, 24, 28>} {
+
+      %c0 = arith.constant 0 : index
+      %c8 = arith.constant 8 : index
+      %thread_id_x = gpu.thread_id x
+
+      // expected-remark @below{{true}}
+      "test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
+      // expected-remark @below{{true}}
+      "test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> ()
+
+      %block_dim_x = gpu.block_dim x
+      // expected-remark @below{{true}}
+      "test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> ()
+
+      // expected-remark @below{{true}}
+      "test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> ()
+      gpu.return
+    }
+  }
+}
+
+// -----
+
+// CHECK-LABEL: func @local_bounds_kernel
+module attributes {gpu.container_module} {
+  gpu.module @gpu_module {
+    gpu.func @local_bounds_kernel() kernel {
+
+      %c0 = arith.constant 0 : index
+      %c1 = arith.constant 1 : index
+      %c8 = arith.constant 8 : index
+
+      %block_dim_x = gpu.block_dim x upper_bound 8
+      // expected-remark @below{{true}}
+      "test.compare"(%block_dim_x, %c1) {cmp = "GE"} : (index, index) -> ()
+      // expected-remark @below{{true}}
+      "test.compare"(%block_dim_x, %c8) {cmp = "LE"} : (index, index) -> ()
+      // expected-error @below{{unknown}}
+      "test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> ()
+
+      %thread_id_x = gpu.thread_id x upper_bound 8
+      // expected-remark @below{{true}}
+      "test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
+      // expected-remark @below{{true}}
+      "test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> ()
+
+      // Note: there isn't a way to express the ID <= size constraint
+      // in this form
+      // expected-error @below{{unknown}}
+      "test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> ()
+      gpu.return
+    }
+  }
+}
diff --git a/mlir/test/Dialect/Linalg/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Linalg/value-bounds-op-interface-impl.mlir
index 189c8e649ba5e..bcd330443cc44 100644
--- a/mlir/test/Dialect/Linalg/value-bounds-op-interface-impl.mlir
+++ b/mlir/test/Dialect/Linalg/value-bounds-op-interface-impl.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
 // RUN:     -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @linalg_fill(
diff --git a/mlir/test/Dialect/MemRef/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/MemRef/value-bounds-op-interface-impl.mlir
index 0e0f216b05d48..dc311c6b59ea4 100644
--- a/mlir/test/Dialect/MemRef/value-bounds-op-interface-impl.mlir
+++ b/mlir/test/Dialect/MemRef/value-bounds-op-interface-impl.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
 // RUN:     -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @memref_alloc(
diff --git a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir
index 65e1017e62c1a..6e0c16a9a2b33 100644
--- a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir
+++ b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \
 // RUN:     -verify-diagnostics -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @scf_for(
diff --git a/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir
index 0ba9983723a0a..c0f64d3c84361 100644
--- a/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir
+++ b/mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
 // RUN:     -split-input-file | FileCheck %s
 
 func.func @unknown_op() -> index {
diff --git a/mlir/test/Dialect/Vector/test-scalable-bounds.mlir b/mlir/test/Dialect/Vector/test-scalable-bounds.mlir
index 6af904beb660b..ddbd805b1cdec 100644
--- a/mlir/test/Dialect/Vector/test-scalable-bounds.mlir
+++ b/mlir/test/Dialect/Vector/test-scalable-bounds.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-affine-reify-value-bounds -cse -verify-diagnostics \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds, cse))' -verify-diagnostics \
 // RUN:   -verify-diagnostics -split-input-file | FileCheck %s
 
 #map_dim_i = affine_map<(d0)[s0] -> (-d0 + 32400, s0)>
diff --git a/mlir/test/Dialect/Vector/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Vector/value-bounds-op-interface-impl.mlir
index c04c82970f9c0..1a94bbac9dff8 100644
--- a/mlir/test/Dialect/Vector/value-bounds-op-interface-impl.mlir
+++ b/mlir/test/Dialect/Vector/value-bounds-op-interface-impl.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
 // RUN:     -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @vector_transfer_write(
diff --git a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp
index 34513cd418e4c..2c954ffc4acef 100644
--- a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h"
 #include "mlir/IR/PatternMatch.h"
+#include "mlir/Interfaces/FunctionInterfaces.h"
 #include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include "mlir/Pass/Pass.h"
 
@@ -30,7 +31,8 @@ namespace {
 
 /// This pass applies the permutation on the first maximal perfect nest.
 struct TestReifyValueBounds
-    : public PassWrapper<TestReifyValueBounds, OperationPass<func::FuncOp>> {
+    : public PassWrapper<TestReifyValueBounds,
+                         InterfacePass<FunctionOpInterface>> {
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestReifyValueBounds)
 
   StringRef getArgument() const final { return PASS_NAME; }
@@ -74,7 +76,7 @@ invertComparisonOperator(ValueBoundsConstraintSet::ComparisonOperator cmp) {
 
 /// Look for "test.reify_bound" ops in the input and replace their results with
 /// the reified values.
-static LogicalResult testReifyValueBounds(func::FuncOp funcOp,
+static LogicalResult testReifyValueBounds(FunctionOpInterface funcOp,
                                           bool reifyToFuncArgs,
                                           bool useArithOps) {
   IRRewriter rewriter(funcOp.getContext());
@@ -156,7 +158,7 @@ static LogicalResult testReifyValueBounds(func::FuncOp funcOp,
 }
 
 /// Look for "test.compare" ops and emit errors/remarks.
-static LogicalResult testEquality(func::FuncOp funcOp) {
+static LogicalResult testEquality(FunctionOpInterface funcOp) {
   IRRewriter rewriter(funcOp.getContext());
   WalkResult result = funcOp.walk([&](test::CompareOp op) {
     auto cmpType = op.getComparisonOperator();

From 6312beef788a209dc7d73c2c10b36197dab1cff3 Mon Sep 17 00:00:00 2001
From: vporpo <vporpodas@google.com>
Date: Thu, 9 Jan 2025 11:53:48 -0800
Subject: [PATCH 47/79] [SandboxVec][BottomUpVec] Use SeedCollector and slice
 seeds (#120826)

With this patch we switch from the temporary dummy seeds to actual seeds
provided by the seed collector.
The seeds get sliced and each slice is used as the starting point for
vectorization.
---
 llvm/include/llvm/SandboxIR/Pass.h            |  6 +-
 llvm/include/llvm/SandboxIR/Utils.h           |  7 +-
 .../Vectorize/SandboxVectorizer/Legality.h    |  1 +
 .../Vectorize/SandboxVectorizer/Scheduler.h   |  7 ++
 .../SandboxVectorizer/SeedCollector.h         |  4 +-
 .../Vectorize/SandboxVectorizer/VecUtils.h    | 10 ++
 .../SandboxVectorizer/Passes/BottomUpVec.cpp  | 91 +++++++++++++++----
 .../SandboxVectorizer/SandboxVectorizer.cpp   |  2 +-
 .../SandboxVectorizer/SeedCollector.cpp       | 17 ++--
 .../SandboxVectorizer/bottomup_basic.ll       |  2 +-
 .../SandboxVectorizer/bottomup_seed_slice.ll  | 33 +++++++
 .../bottomup_seed_slice_pow2.ll               | 37 ++++++++
 .../SandboxVectorizer/VecUtilsTest.cpp        | 11 +++
 13 files changed, 196 insertions(+), 32 deletions(-)
 create mode 100644 llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
 create mode 100644 llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll

diff --git a/llvm/include/llvm/SandboxIR/Pass.h b/llvm/include/llvm/SandboxIR/Pass.h
index 4f4eae87cd3ff..267389a8a87a2 100644
--- a/llvm/include/llvm/SandboxIR/Pass.h
+++ b/llvm/include/llvm/SandboxIR/Pass.h
@@ -16,6 +16,7 @@ namespace llvm {
 
 class AAResults;
 class ScalarEvolution;
+class TargetTransformInfo;
 
 namespace sandboxir {
 
@@ -25,15 +26,18 @@ class Region;
 class Analyses {
   AAResults *AA = nullptr;
   ScalarEvolution *SE = nullptr;
+  TargetTransformInfo *TTI = nullptr;
 
   Analyses() = default;
 
 public:
-  Analyses(AAResults &AA, ScalarEvolution &SE) : AA(&AA), SE(&SE) {}
+  Analyses(AAResults &AA, ScalarEvolution &SE, TargetTransformInfo &TTI)
+      : AA(&AA), SE(&SE), TTI(&TTI) {}
 
 public:
   AAResults &getAA() const { return *AA; }
   ScalarEvolution &getScalarEvolution() const { return *SE; }
+  TargetTransformInfo &getTTI() const { return *TTI; }
   /// For use by unit tests.
   static Analyses emptyForTesting() { return Analyses(); }
 };
diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h
index a73498adea1d5..d58fe52214395 100644
--- a/llvm/include/llvm/SandboxIR/Utils.h
+++ b/llvm/include/llvm/SandboxIR/Utils.h
@@ -60,11 +60,16 @@ class Utils {
         getUnderlyingObject(LSI->getPointerOperand()->Val));
   }
 
+  /// \Returns the number of bits of \p Ty.
+  static unsigned getNumBits(Type *Ty, const DataLayout &DL) {
+    return DL.getTypeSizeInBits(Ty->LLVMTy);
+  }
+
   /// \Returns the number of bits required to represent the operands or return
   /// value of \p V in \p DL.
   static unsigned getNumBits(Value *V, const DataLayout &DL) {
     Type *Ty = getExpectedType(V);
-    return DL.getTypeSizeInBits(Ty->LLVMTy);
+    return getNumBits(Ty, DL);
   }
 
   /// \Returns the number of bits required to represent the operands or
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
index 63d6ef31c8645..233cf82a1b3df 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -177,6 +177,7 @@ class LegalityAnalysis {
   // TODO: Try to remove the SkipScheduling argument by refactoring the tests.
   const LegalityResult &canVectorize(ArrayRef<Value *> Bndl,
                                      bool SkipScheduling = false);
+  void clear() { Sched.clear(); }
 };
 
 } // namespace llvm::sandboxir
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
index 9b68d47ce39aa..3ec0ac0f78a74 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
@@ -147,6 +147,13 @@ class Scheduler {
   ~Scheduler() {}
 
   bool trySchedule(ArrayRef<Instruction *> Instrs);
+  /// Clear the scheduler's state, including the DAG.
+  void clear() {
+    Bndls.clear();
+    // TODO: clear view once it lands.
+    DAG.clear();
+    ScheduleTopItOpt = std::nullopt;
+  }
 
 #ifndef NDEBUG
   void dump(raw_ostream &OS) const;
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
index 6e16a84d832e5..73b2bdf8f181f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -95,8 +95,8 @@ class SeedBundle {
   /// with a total size <= \p MaxVecRegBits, or an empty slice if the
   /// requirements cannot be met . If \p ForcePowOf2 is true, then the returned
   /// slice will have a total number of bits that is a power of 2.
-  MutableArrayRef<Instruction *>
-  getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2);
+  ArrayRef<Instruction *> getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
+                                   bool ForcePowOf2);
 
   /// \Returns the number of seed elements in the bundle.
   std::size_t size() const { return Seeds.size(); }
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
index fc9d67fcfcdec..28fa33656dd5f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
@@ -133,6 +133,16 @@ class VecUtils {
     assert(tryGetCommonScalarType(Bndl) && "Expected common scalar type!");
     return ScalarTy;
   }
+  /// \Returns the first integer power of 2 that is <= Num.
+  static unsigned getFloorPowerOf2(unsigned Num) {
+    if (Num == 0)
+      return Num;
+    unsigned Mask = Num;
+    Mask >>= 1;
+    for (unsigned ShiftBy = 1; ShiftBy < sizeof(Num) * 8; ShiftBy <<= 1)
+      Mask |= Mask >> ShiftBy;
+    return Num & ~Mask;
+  }
 };
 
 } // namespace llvm::sandboxir
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index a2ea11be59b8e..18e072c17d202 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -8,29 +8,31 @@
 
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/SandboxIR/Function.h"
 #include "llvm/SandboxIR/Instruction.h"
 #include "llvm/SandboxIR/Module.h"
 #include "llvm/SandboxIR/Utils.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
 
-namespace llvm::sandboxir {
+namespace llvm {
+
+static cl::opt<unsigned>
+    OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(0), cl::Hidden,
+                       cl::desc("Override the vector register size in bits, "
+                                "which is otherwise found by querying TTI."));
+static cl::opt<bool>
+    AllowNonPow2("sbvec-allow-non-pow2", cl::init(false), cl::Hidden,
+                 cl::desc("Allow non-power-of-2 vectorization."));
+
+namespace sandboxir {
 
 BottomUpVec::BottomUpVec(StringRef Pipeline)
     : FunctionPass("bottom-up-vec"),
       RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {}
 
-// TODO: This is a temporary function that returns some seeds.
-//       Replace this with SeedCollector's function when it lands.
-static llvm::SmallVector<Value *, 4> collectSeeds(BasicBlock &BB) {
-  llvm::SmallVector<Value *, 4> Seeds;
-  for (auto &I : BB)
-    if (auto *SI = llvm::dyn_cast<StoreInst>(&I))
-      Seeds.push_back(SI);
-  return Seeds;
-}
-
 static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl,
                                           unsigned OpIdx) {
   SmallVector<Value *, 4> Operands;
@@ -265,6 +267,7 @@ Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl, unsigned Depth) {
 
 bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
   DeadInstrCandidates.clear();
+  Legality->clear();
   vectorizeRec(Bndl, /*Depth=*/0);
   tryEraseDeadInstrs();
   return Change;
@@ -275,17 +278,67 @@ bool BottomUpVec::runOnFunction(Function &F, const Analyses &A) {
       A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(),
       F.getContext());
   Change = false;
+  const auto &DL = F.getParent()->getDataLayout();
+  unsigned VecRegBits =
+      OverrideVecRegBits != 0
+          ? OverrideVecRegBits
+          : A.getTTI()
+                .getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
+                .getFixedValue();
+
   // TODO: Start from innermost BBs first
   for (auto &BB : F) {
-    // TODO: Replace with proper SeedCollector function.
-    auto Seeds = collectSeeds(BB);
-    // TODO: Slice Seeds into smaller chunks.
-    // TODO: If vectorization succeeds, run the RegionPassManager on the
-    // resulting region.
-    if (Seeds.size() >= 2)
-      Change |= tryVectorize(Seeds);
+    SeedCollector SC(&BB, A.getScalarEvolution());
+    for (SeedBundle &Seeds : SC.getStoreSeeds()) {
+      unsigned ElmBits =
+          Utils::getNumBits(VecUtils::getElementType(Utils::getExpectedType(
+                                Seeds[Seeds.getFirstUnusedElementIdx()])),
+                            DL);
+
+      auto DivideBy2 = [](unsigned Num) {
+        auto Floor = VecUtils::getFloorPowerOf2(Num);
+        if (Floor == Num)
+          return Floor / 2;
+        return Floor;
+      };
+      // Try to create the largest vector supported by the target. If it fails
+      // reduce the vector size by half.
+      for (unsigned SliceElms = std::min(VecRegBits / ElmBits,
+                                         Seeds.getNumUnusedBits() / ElmBits);
+           SliceElms >= 2u; SliceElms = DivideBy2(SliceElms)) {
+        if (Seeds.allUsed())
+          break;
+        // Keep trying offsets after FirstUnusedElementIdx, until we vectorize
+        // the slice. This could be quite expensive, so we enforce a limit.
+        for (unsigned Offset = Seeds.getFirstUnusedElementIdx(),
+                      OE = Seeds.size();
+             Offset + 1 < OE; Offset += 1) {
+          // Seeds are getting used as we vectorize, so skip them.
+          if (Seeds.isUsed(Offset))
+            continue;
+          if (Seeds.allUsed())
+            break;
+
+          auto SeedSlice =
+              Seeds.getSlice(Offset, SliceElms * ElmBits, !AllowNonPow2);
+          if (SeedSlice.empty())
+            continue;
+
+          assert(SeedSlice.size() >= 2 && "Should have been rejected!");
+
+          // TODO: If vectorization succeeds, run the RegionPassManager on the
+          // resulting region.
+
+          // TODO: Refactor to remove the unnecessary copy to SeedSliceVals.
+          SmallVector<Value *> SeedSliceVals(SeedSlice.begin(),
+                                             SeedSlice.end());
+          Change |= tryVectorize(SeedSliceVals);
+        }
+      }
+    }
   }
   return Change;
 }
 
-} // namespace llvm::sandboxir
+} // namespace sandboxir
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
index c22eb01d74a1c..a6e2b40000529 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
@@ -86,6 +86,6 @@ bool SandboxVectorizerPass::runImpl(Function &LLVMF) {
 
   // Create SandboxIR for LLVMF and run BottomUpVec on it.
   sandboxir::Function &F = *Ctx->createFunction(&LLVMF);
-  sandboxir::Analyses A(*AA, *SE);
+  sandboxir::Analyses A(*AA, *SE, *TTI);
   return FPM.runOnFunction(F, A);
 }
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
index 6ea34c5e0598d..a3ce663407c4a 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
@@ -31,9 +31,9 @@ cl::opt<unsigned> SeedGroupsLimit(
     cl::desc("Limit the number of collected seeds groups in a BB to "
              "cap compilation time."));
 
-MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
-                                                    unsigned MaxVecRegBits,
-                                                    bool ForcePowerOf2) {
+ArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
+                                             unsigned MaxVecRegBits,
+                                             bool ForcePowerOf2) {
   // Use uint32_t here for compatibility with IsPowerOf2_32
 
   // BitCount tracks the size of the working slice. From that we can tell
@@ -47,10 +47,13 @@ MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
   // Can't start a slice with a used instruction.
   assert(!isUsed(StartIdx) && "Expected unused at StartIdx");
   for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) {
+    // Stop if this instruction is used. This needs to be done before
+    // getNumBits() because a "used" instruction may have been erased.
+    if (isUsed(StartIdx + NumElements))
+      break;
     uint32_t InstBits = Utils::getNumBits(S);
-    // Stop if this instruction is used, or if adding it puts the slice over
-    // the limit.
-    if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits)
+    // Stop if adding it puts the slice over the limit.
+    if (BitCount + InstBits > MaxVecRegBits)
       break;
     NumElements++;
     BitCount += InstBits;
@@ -68,7 +71,7 @@ MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
          "Must be a power of two");
   // Return any non-empty slice
   if (NumElements > 1)
-    return MutableArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
+    return ArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
   else
     return {};
 }
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index 7422d287ff3e2..785d1f4ef666f 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=sandbox-vectorizer -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
 
 define void @store_load(ptr %ptr) {
 ; CHECK-LABEL: define void @store_load(
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
new file mode 100644
index 0000000000000..46cda3c80aaa3
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+
+
+declare void @foo()
+define void @slice_seeds(ptr %ptr, float %val) {
+; CHECK-LABEL: define void @slice_seeds(
+; CHECK-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; CHECK-NEXT:    [[LD2:%.*]] = load float, ptr [[PTR2]], align 4
+; CHECK-NEXT:    store float [[LD2]], ptr [[PTR2]], align 4
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
+; CHECK-NEXT:    store <2 x float> [[VECL]], ptr [[PTR0]], align 4
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr1 = getelementptr float, ptr %ptr, i32 1
+  %ptr2 = getelementptr float, ptr %ptr, i32 2
+
+  %ld2 = load float, ptr %ptr2
+  store float %ld2, ptr %ptr2
+  ; This call blocks scheduling of all 3 stores.
+  call void @foo()
+
+  %ld0 = load float, ptr %ptr0
+  %ld1 = load float, ptr %ptr1
+  store float %ld0, ptr %ptr0
+  store float %ld1, ptr %ptr1
+  ret void
+}
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
new file mode 100644
index 0000000000000..22119c4491b92
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=false -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=POW2
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=true -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=NON-POW2
+
+define void @pow2(ptr %ptr, float %val) {
+; POW2-LABEL: define void @pow2(
+; POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; POW2-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; POW2-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; POW2-NEXT:    [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; POW2-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
+; POW2-NEXT:    [[LD2:%.*]] = load float, ptr [[PTR2]], align 4
+; POW2-NEXT:    store <2 x float> [[VECL]], ptr [[PTR0]], align 4
+; POW2-NEXT:    store float [[LD2]], ptr [[PTR2]], align 4
+; POW2-NEXT:    ret void
+;
+; NON-POW2-LABEL: define void @pow2(
+; NON-POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; NON-POW2-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; NON-POW2-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; NON-POW2-NEXT:    [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; NON-POW2-NEXT:    [[PACK2:%.*]] = load <3 x float>, ptr [[PTR0]], align 4
+; NON-POW2-NEXT:    store <3 x float> [[PACK2]], ptr [[PTR0]], align 4
+; NON-POW2-NEXT:    ret void
+;
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr1 = getelementptr float, ptr %ptr, i32 1
+  %ptr2 = getelementptr float, ptr %ptr, i32 2
+
+  %ld0 = load float, ptr %ptr0
+  %ld1 = load float, ptr %ptr1
+  %ld2 = load float, ptr %ptr2
+  store float %ld0, ptr %ptr0
+  store float %ld1, ptr %ptr1
+  store float %ld2, ptr %ptr2
+  ret void
+}
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
index cf7b6cbc7e55c..8661dcd5067c0 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
@@ -472,3 +472,14 @@ define void @foo(i8 %v, ptr %ptr) {
 #endif // NDEBUG
   }
 }
+
+TEST_F(VecUtilsTest, FloorPowerOf2) {
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(0), 0u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(1 << 0), 1u << 0);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(3), 2u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(4), 4u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(5), 4u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(7), 4u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(8), 8u);
+  EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(9), 8u);
+}

From 1739ba9bb5b5e0b2f3c7b381f7852b77a53ef3ba Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Thu, 9 Jan 2025 11:57:11 -0800
Subject: [PATCH 48/79] [OpenMP][FIX] Adjust test to be non-flaky (#122331)

The test runs asynchronous kernels and depending on the timing the
output is slightly different. We now only check for the common parts of
the output.
---
 offload/test/sanitizer/kernel_crash_async.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/test/sanitizer/kernel_crash_async.c b/offload/test/sanitizer/kernel_crash_async.c
index ee22ba504018b..d51418f817889 100644
--- a/offload/test/sanitizer/kernel_crash_async.c
+++ b/offload/test/sanitizer/kernel_crash_async.c
@@ -36,4 +36,4 @@ int main(void) {
 // TRACE: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l29)
 // TRACE:     launchKernel
 //
-// CHECK: Kernel {{[0-9]}}: {{.*}} (__omp_offloading_{{.*}}_main_l29)
+// CHECK: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l29)

From b57c0bac81fe4f5c85c6554ca574cf2d5648e0c5 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Thu, 9 Jan 2025 13:57:39 -0600
Subject: [PATCH 49/79] [OpenMP] Update atomic helpers to just use headers
 (#122185)

Summary:
Previously we had some indirection here, this patch updates these
utilities to just be normal template functions. We use SFINAE to manage
the special case handling for floats. Also this strips address spaces so
it can be used more generally.
---
 offload/DeviceRTL/CMakeLists.txt            |   2 +-
 offload/DeviceRTL/include/DeviceUtils.h     |  41 +++++
 offload/DeviceRTL/include/Synchronization.h | 169 ++++++++++++++-----
 offload/DeviceRTL/src/Synchronization.cpp   | 175 +-------------------
 4 files changed, 170 insertions(+), 217 deletions(-)

diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 22940264f9b19..099634e211e7a 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -99,7 +99,7 @@ set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
               ${clang_opt_flags} --offload-device-only
              -nocudalib -nogpulib -nogpuinc -nostdlibinc
              -fopenmp -fopenmp-cuda-mode
-             -Wno-unknown-cuda-version
+             -Wno-unknown-cuda-version -Wno-openmp-target
              -DOMPTARGET_DEVICE_RUNTIME
              -I${include_directory}
              -I${devicertl_base_directory}/../include
diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h
index 549ca16e1c34c..fb00d6c755255 100644
--- a/offload/DeviceRTL/include/DeviceUtils.h
+++ b/offload/DeviceRTL/include/DeviceUtils.h
@@ -19,6 +19,47 @@
 
 namespace utils {
 
+template <typename T> struct type_identity {
+  using type = T;
+};
+
+template <typename T, T v> struct integral_constant {
+  inline static constexpr T value = v;
+};
+
+/// Freestanding SFINAE helpers.
+template <class T> struct remove_cv : type_identity<T> {};
+template <class T> struct remove_cv<const T> : type_identity<T> {};
+template <class T> struct remove_cv<volatile T> : type_identity<T> {};
+template <class T> struct remove_cv<const volatile T> : type_identity<T> {};
+template <class T> using remove_cv_t = typename remove_cv<T>::type;
+
+using true_type = integral_constant<bool, true>;
+using false_type = integral_constant<bool, false>;
+
+template <typename T, typename U> struct is_same : false_type {};
+template <typename T> struct is_same<T, T> : true_type {};
+template <typename T, typename U>
+inline constexpr bool is_same_v = is_same<T, U>::value;
+
+template <typename T> struct is_floating_point {
+  inline static constexpr bool value =
+      is_same_v<remove_cv<T>, float> || is_same_v<remove_cv<T>, double>;
+};
+template <typename T>
+inline constexpr bool is_floating_point_v = is_floating_point<T>::value;
+
+template <bool B, typename T = void> struct enable_if;
+template <typename T> struct enable_if<true, T> : type_identity<T> {};
+template <bool B, typename T = void>
+using enable_if_t = typename enable_if<B, T>::type;
+
+template <class T> struct remove_addrspace : type_identity<T> {};
+template <class T, int N>
+struct remove_addrspace<T [[clang::address_space(N)]]> : type_identity<T> {};
+template <class T>
+using remove_addrspace_t = typename remove_addrspace<T>::type;
+
 /// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
 /// is identified by \p Mask.
 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h
index 7a73f9ba72877..ae065850d824c 100644
--- a/offload/DeviceRTL/include/Synchronization.h
+++ b/offload/DeviceRTL/include/Synchronization.h
@@ -13,9 +13,11 @@
 #define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
 
 #include "DeviceTypes.h"
+#include "DeviceUtils.h"
 
-namespace ompx {
+#pragma omp begin declare target device_type(nohost)
 
+namespace ompx {
 namespace atomic {
 
 enum OrderingTy {
@@ -48,51 +50,124 @@ uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering,
 /// result is stored in \p *Addr;
 /// {
 
-#define ATOMIC_COMMON_OP(TY)                                                   \
-  TY add(TY *Addr, TY V, OrderingTy Ordering);                                 \
-  TY mul(TY *Addr, TY V, OrderingTy Ordering);                                 \
-  TY load(TY *Addr, OrderingTy Ordering);                                      \
-  void store(TY *Addr, TY V, OrderingTy Ordering);                             \
-  bool cas(TY *Addr, TY ExpectedV, TY DesiredV, OrderingTy OrderingSucc,       \
-           OrderingTy OrderingFail);
-
-#define ATOMIC_FP_ONLY_OP(TY)                                                  \
-  TY min(TY *Addr, TY V, OrderingTy Ordering);                                 \
-  TY max(TY *Addr, TY V, OrderingTy Ordering);
-
-#define ATOMIC_INT_ONLY_OP(TY)                                                 \
-  TY min(TY *Addr, TY V, OrderingTy Ordering);                                 \
-  TY max(TY *Addr, TY V, OrderingTy Ordering);                                 \
-  TY bit_or(TY *Addr, TY V, OrderingTy Ordering);                              \
-  TY bit_and(TY *Addr, TY V, OrderingTy Ordering);                             \
-  TY bit_xor(TY *Addr, TY V, OrderingTy Ordering);
-
-#define ATOMIC_FP_OP(TY)                                                       \
-  ATOMIC_FP_ONLY_OP(TY)                                                        \
-  ATOMIC_COMMON_OP(TY)
-
-#define ATOMIC_INT_OP(TY)                                                      \
-  ATOMIC_INT_ONLY_OP(TY)                                                       \
-  ATOMIC_COMMON_OP(TY)
-
-// This needs to be kept in sync with the header. Also the reason we don't use
-// templates here.
-ATOMIC_INT_OP(int8_t)
-ATOMIC_INT_OP(int16_t)
-ATOMIC_INT_OP(int32_t)
-ATOMIC_INT_OP(int64_t)
-ATOMIC_INT_OP(uint8_t)
-ATOMIC_INT_OP(uint16_t)
-ATOMIC_INT_OP(uint32_t)
-ATOMIC_INT_OP(uint64_t)
-ATOMIC_FP_OP(float)
-ATOMIC_FP_OP(double)
-
-#undef ATOMIC_INT_ONLY_OP
-#undef ATOMIC_FP_ONLY_OP
-#undef ATOMIC_COMMON_OP
-#undef ATOMIC_INT_OP
-#undef ATOMIC_FP_OP
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+bool cas(Ty *Address, V ExpectedV, V DesiredV, atomic::OrderingTy OrderingSucc,
+         atomic::OrderingTy OrderingFail) {
+  return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false,
+                                          OrderingSucc, OrderingFail,
+                                          __MEMORY_SCOPE_DEVICE);
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+V add(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  return __scoped_atomic_fetch_add(Address, Val, Ordering,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+V load(Ty *Address, atomic::OrderingTy Ordering) {
+  return add(Address, Ty(0), Ordering);
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+void store(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  __scoped_atomic_store_n(Address, Val, Ordering, __MEMORY_SCOPE_DEVICE);
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+V mul(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  Ty TypedCurrentVal, TypedResultVal, TypedNewVal;
+  bool Success;
+  do {
+    TypedCurrentVal = atomic::load(Address, Ordering);
+    TypedNewVal = TypedCurrentVal * Val;
+    Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering,
+                          atomic::relaxed);
+  } while (!Success);
+  return TypedResultVal;
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+utils::enable_if_t<!utils::is_floating_point_v<V>, V>
+max(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  return __scoped_atomic_fetch_max(Address, Val, Ordering,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+utils::enable_if_t<utils::is_same_v<V, float>, V>
+max(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  if (Val >= 0)
+    return utils::convertViaPun<float>(
+        max((int32_t *)Address, utils::convertViaPun<int32_t>(Val), Ordering));
+  return utils::convertViaPun<float>(
+      min((uint32_t *)Address, utils::convertViaPun<uint32_t>(Val), Ordering));
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+utils::enable_if_t<utils::is_same_v<V, double>, V>
+max(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  if (Val >= 0)
+    return utils::convertViaPun<double>(
+        max((int64_t *)Address, utils::convertViaPun<int64_t>(Val), Ordering));
+  return utils::convertViaPun<double>(
+      min((uint64_t *)Address, utils::convertViaPun<uint64_t>(Val), Ordering));
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+utils::enable_if_t<!utils::is_floating_point_v<V>, V>
+min(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  return __scoped_atomic_fetch_min(Address, Val, Ordering,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
+// TODO: Implement this with __atomic_fetch_max and remove the duplication.
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+utils::enable_if_t<utils::is_same_v<V, float>, V>
+min(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  if (Val >= 0)
+    return utils::convertViaPun<float>(
+        min((int32_t *)Address, utils::convertViaPun<int32_t>(Val), Ordering));
+  return utils::convertViaPun<float>(
+      max((uint32_t *)Address, utils::convertViaPun<uint32_t>(Val), Ordering));
+}
+
+// TODO: Implement this with __atomic_fetch_max and remove the duplication.
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+utils::enable_if_t<utils::is_same_v<V, double>, V>
+min(Ty *Address, utils::remove_addrspace_t<Ty> Val,
+    atomic::OrderingTy Ordering) {
+  if (Val >= 0)
+    return utils::convertViaPun<double>(
+        min((int64_t *)Address, utils::convertViaPun<int64_t>(Val), Ordering));
+  return utils::convertViaPun<double>(
+      max((uint64_t *)Address, utils::convertViaPun<uint64_t>(Val), Ordering));
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+V bit_or(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  return __scoped_atomic_fetch_or(Address, Val, Ordering,
+                                  __MEMORY_SCOPE_DEVICE);
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+V bit_and(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  return __scoped_atomic_fetch_and(Address, Val, Ordering,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
+template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
+V bit_xor(Ty *Address, V Val, atomic::OrderingTy Ordering) {
+  return __scoped_atomic_fetch_xor(Address, Val, Ordering,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
+static inline uint32_t atomicExchange(uint32_t *Address, uint32_t Val,
+                                      atomic::OrderingTy Ordering) {
+  uint32_t R;
+  __scoped_atomic_exchange(Address, &Val, &R, Ordering, __MEMORY_SCOPE_DEVICE);
+  return R;
+}
 
 ///}
 
@@ -145,4 +220,6 @@ void system(atomic::OrderingTy Ordering);
 
 } // namespace ompx
 
+#pragma omp end declare target
+
 #endif
diff --git a/offload/DeviceRTL/src/Synchronization.cpp b/offload/DeviceRTL/src/Synchronization.cpp
index 3aee23a865d3c..72a97ae3fcfb4 100644
--- a/offload/DeviceRTL/src/Synchronization.cpp
+++ b/offload/DeviceRTL/src/Synchronization.cpp
@@ -31,95 +31,6 @@ namespace impl {
 /// NOTE: This function needs to be implemented by every target.
 uint32_t atomicInc(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering,
                    atomic::MemScopeTy MemScope);
-
-template <typename Ty>
-Ty atomicAdd(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
-  return __scoped_atomic_fetch_add(Address, Val, Ordering,
-                                   __MEMORY_SCOPE_DEVICE);
-}
-
-template <typename Ty>
-Ty atomicMul(Ty *Address, Ty V, atomic::OrderingTy Ordering) {
-  Ty TypedCurrentVal, TypedResultVal, TypedNewVal;
-  bool Success;
-  do {
-    TypedCurrentVal = atomic::load(Address, Ordering);
-    TypedNewVal = TypedCurrentVal * V;
-    Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering,
-                          atomic::relaxed);
-  } while (!Success);
-  return TypedResultVal;
-}
-
-template <typename Ty> Ty atomicLoad(Ty *Address, atomic::OrderingTy Ordering) {
-  return atomicAdd(Address, Ty(0), Ordering);
-}
-
-template <typename Ty>
-void atomicStore(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
-  __scoped_atomic_store_n(Address, Val, Ordering, __MEMORY_SCOPE_DEVICE);
-}
-
-template <typename Ty>
-bool atomicCAS(Ty *Address, Ty ExpectedV, Ty DesiredV,
-               atomic::OrderingTy OrderingSucc,
-               atomic::OrderingTy OrderingFail) {
-  return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false,
-                                          OrderingSucc, OrderingFail,
-                                          __MEMORY_SCOPE_DEVICE);
-}
-
-template <typename Ty>
-Ty atomicMin(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
-  return __scoped_atomic_fetch_min(Address, Val, Ordering,
-                                   __MEMORY_SCOPE_DEVICE);
-}
-
-template <typename Ty>
-Ty atomicMax(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
-  return __scoped_atomic_fetch_max(Address, Val, Ordering,
-                                   __MEMORY_SCOPE_DEVICE);
-}
-
-// TODO: Implement this with __atomic_fetch_max and remove the duplication.
-template <typename Ty, typename STy, typename UTy>
-Ty atomicMinFP(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
-  if (Val >= 0)
-    return atomicMin((STy *)Address, utils::convertViaPun<STy>(Val), Ordering);
-  return atomicMax((UTy *)Address, utils::convertViaPun<UTy>(Val), Ordering);
-}
-
-template <typename Ty, typename STy, typename UTy>
-Ty atomicMaxFP(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
-  if (Val >= 0)
-    return atomicMax((STy *)Address, utils::convertViaPun<STy>(Val), Ordering);
-  return atomicMin((UTy *)Address, utils::convertViaPun<UTy>(Val), Ordering);
-}
-
-template <typename Ty>
-Ty atomicOr(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
-  return __scoped_atomic_fetch_or(Address, Val, Ordering,
-                                  __MEMORY_SCOPE_DEVICE);
-}
-
-template <typename Ty>
-Ty atomicAnd(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
-  return __scoped_atomic_fetch_and(Address, Val, Ordering,
-                                   __MEMORY_SCOPE_DEVICE);
-}
-
-template <typename Ty>
-Ty atomicXOr(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
-  return __scoped_atomic_fetch_xor(Address, Val, Ordering,
-                                   __MEMORY_SCOPE_DEVICE);
-}
-
-uint32_t atomicExchange(uint32_t *Address, uint32_t Val,
-                        atomic::OrderingTy Ordering) {
-  uint32_t R;
-  __scoped_atomic_exchange(Address, &Val, &R, Ordering, __MEMORY_SCOPE_DEVICE);
-  return R;
-}
 ///}
 
 // Forward declarations defined to be defined for AMDGCN and NVPTX.
@@ -279,8 +190,8 @@ void setCriticalLock(omp_lock_t *Lock) {
   uint64_t LowestActiveThread = utils::ffs(mapping::activemask()) - 1;
   if (mapping::getThreadIdInWarp() == LowestActiveThread) {
     fenceKernel(atomic::release);
-    while (!atomicCAS((uint32_t *)Lock, UNSET, SET, atomic::relaxed,
-                      atomic::relaxed)) {
+    while (
+        !cas((uint32_t *)Lock, UNSET, SET, atomic::relaxed, atomic::relaxed)) {
       __builtin_amdgcn_s_sleep(32);
     }
     fenceKernel(atomic::aquire);
@@ -341,7 +252,7 @@ void unsetLock(omp_lock_t *Lock) {
 }
 
 int testLock(omp_lock_t *Lock) {
-  return atomicAdd((uint32_t *)Lock, 0u, atomic::seq_cst);
+  return atomic::add((uint32_t *)Lock, 0u, atomic::seq_cst);
 }
 
 void initLock(omp_lock_t *Lock) { unsetLock(Lock); }
@@ -350,8 +261,8 @@ void destroyLock(omp_lock_t *Lock) { unsetLock(Lock); }
 
 void setLock(omp_lock_t *Lock) {
   // TODO: not sure spinning is a good idea here..
-  while (atomicCAS((uint32_t *)Lock, UNSET, SET, atomic::seq_cst,
-                   atomic::seq_cst) != UNSET) {
+  while (atomic::cas((uint32_t *)Lock, UNSET, SET, atomic::seq_cst,
+                     atomic::seq_cst) != UNSET) {
     int32_t start = __nvvm_read_ptx_sreg_clock();
     int32_t now;
     for (;;) {
@@ -394,82 +305,6 @@ void fence::kernel(atomic::OrderingTy Ordering) { impl::fenceKernel(Ordering); }
 
 void fence::system(atomic::OrderingTy Ordering) { impl::fenceSystem(Ordering); }
 
-#define ATOMIC_COMMON_OP(TY)                                                   \
-  TY atomic::add(TY *Addr, TY V, atomic::OrderingTy Ordering) {                \
-    return impl::atomicAdd(Addr, V, Ordering);                                 \
-  }                                                                            \
-  TY atomic::mul(TY *Addr, TY V, atomic::OrderingTy Ordering) {                \
-    return impl::atomicMul(Addr, V, Ordering);                                 \
-  }                                                                            \
-  TY atomic::load(TY *Addr, atomic::OrderingTy Ordering) {                     \
-    return impl::atomicLoad(Addr, Ordering);                                   \
-  }                                                                            \
-  bool atomic::cas(TY *Addr, TY ExpectedV, TY DesiredV,                        \
-                   atomic::OrderingTy OrderingSucc,                            \
-                   atomic::OrderingTy OrderingFail) {                          \
-    return impl::atomicCAS(Addr, ExpectedV, DesiredV, OrderingSucc,            \
-                           OrderingFail);                                      \
-  }
-
-#define ATOMIC_FP_ONLY_OP(TY, STY, UTY)                                        \
-  TY atomic::min(TY *Addr, TY V, atomic::OrderingTy Ordering) {                \
-    return impl::atomicMinFP<TY, STY, UTY>(Addr, V, Ordering);                 \
-  }                                                                            \
-  TY atomic::max(TY *Addr, TY V, atomic::OrderingTy Ordering) {                \
-    return impl::atomicMaxFP<TY, STY, UTY>(Addr, V, Ordering);                 \
-  }                                                                            \
-  void atomic::store(TY *Addr, TY V, atomic::OrderingTy Ordering) {            \
-    impl::atomicStore(reinterpret_cast<UTY *>(Addr),                           \
-                      utils::convertViaPun<UTY>(V), Ordering);                 \
-  }
-
-#define ATOMIC_INT_ONLY_OP(TY)                                                 \
-  TY atomic::min(TY *Addr, TY V, atomic::OrderingTy Ordering) {                \
-    return impl::atomicMin<TY>(Addr, V, Ordering);                             \
-  }                                                                            \
-  TY atomic::max(TY *Addr, TY V, atomic::OrderingTy Ordering) {                \
-    return impl::atomicMax<TY>(Addr, V, Ordering);                             \
-  }                                                                            \
-  TY atomic::bit_or(TY *Addr, TY V, atomic::OrderingTy Ordering) {             \
-    return impl::atomicOr(Addr, V, Ordering);                                  \
-  }                                                                            \
-  TY atomic::bit_and(TY *Addr, TY V, atomic::OrderingTy Ordering) {            \
-    return impl::atomicAnd(Addr, V, Ordering);                                 \
-  }                                                                            \
-  TY atomic::bit_xor(TY *Addr, TY V, atomic::OrderingTy Ordering) {            \
-    return impl::atomicXOr(Addr, V, Ordering);                                 \
-  }                                                                            \
-  void atomic::store(TY *Addr, TY V, atomic::OrderingTy Ordering) {            \
-    impl::atomicStore(Addr, V, Ordering);                                      \
-  }
-
-#define ATOMIC_FP_OP(TY, STY, UTY)                                             \
-  ATOMIC_FP_ONLY_OP(TY, STY, UTY)                                              \
-  ATOMIC_COMMON_OP(TY)
-
-#define ATOMIC_INT_OP(TY)                                                      \
-  ATOMIC_INT_ONLY_OP(TY)                                                       \
-  ATOMIC_COMMON_OP(TY)
-
-// This needs to be kept in sync with the header. Also the reason we don't use
-// templates here.
-ATOMIC_INT_OP(int8_t)
-ATOMIC_INT_OP(int16_t)
-ATOMIC_INT_OP(int32_t)
-ATOMIC_INT_OP(int64_t)
-ATOMIC_INT_OP(uint8_t)
-ATOMIC_INT_OP(uint16_t)
-ATOMIC_INT_OP(uint32_t)
-ATOMIC_INT_OP(uint64_t)
-ATOMIC_FP_OP(float, int32_t, uint32_t)
-ATOMIC_FP_OP(double, int64_t, uint64_t)
-
-#undef ATOMIC_INT_ONLY_OP
-#undef ATOMIC_FP_ONLY_OP
-#undef ATOMIC_COMMON_OP
-#undef ATOMIC_INT_OP
-#undef ATOMIC_FP_OP
-
 uint32_t atomic::inc(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering,
                      atomic::MemScopeTy MemScope) {
   return impl::atomicInc(Addr, V, Ordering, MemScope);

From f53cb84df6b80458cb4d5ab7398a590356a3a952 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Thu, 9 Jan 2025 13:59:21 -0600
Subject: [PATCH 50/79] [OpenMP] Use __builtin_bit_cast instead of UB type
 punning (#122325)

Summary:
Use a normal bitcast, remove from the shared utils since it's not
available in
GCC 7.4
---
 offload/DeviceRTL/include/DeviceUtils.h     |  5 ++++
 offload/DeviceRTL/include/Synchronization.h | 32 ++++++++++-----------
 offload/DeviceRTL/src/Mapping.cpp           |  8 +++---
 offload/include/Shared/Utils.h              |  5 ----
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h
index fb00d6c755255..fa66b973a4f5e 100644
--- a/offload/DeviceRTL/include/DeviceUtils.h
+++ b/offload/DeviceRTL/include/DeviceUtils.h
@@ -60,6 +60,11 @@ struct remove_addrspace<T [[clang::address_space(N)]]> : type_identity<T> {};
 template <class T>
 using remove_addrspace_t = typename remove_addrspace<T>::type;
 
+template <typename To, typename From> inline To bitCast(From V) {
+  static_assert(sizeof(To) == sizeof(From), "Bad conversion");
+  return __builtin_bit_cast(To, V);
+}
+
 /// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
 /// is identified by \p Mask.
 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h
index ae065850d824c..e1968675550d4 100644
--- a/offload/DeviceRTL/include/Synchronization.h
+++ b/offload/DeviceRTL/include/Synchronization.h
@@ -98,20 +98,20 @@ template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
 utils::enable_if_t<utils::is_same_v<V, float>, V>
 max(Ty *Address, V Val, atomic::OrderingTy Ordering) {
   if (Val >= 0)
-    return utils::convertViaPun<float>(
-        max((int32_t *)Address, utils::convertViaPun<int32_t>(Val), Ordering));
-  return utils::convertViaPun<float>(
-      min((uint32_t *)Address, utils::convertViaPun<uint32_t>(Val), Ordering));
+    return utils::bitCast<float>(
+        max((int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering));
+  return utils::bitCast<float>(
+      min((uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering));
 }
 
 template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
 utils::enable_if_t<utils::is_same_v<V, double>, V>
 max(Ty *Address, V Val, atomic::OrderingTy Ordering) {
   if (Val >= 0)
-    return utils::convertViaPun<double>(
-        max((int64_t *)Address, utils::convertViaPun<int64_t>(Val), Ordering));
-  return utils::convertViaPun<double>(
-      min((uint64_t *)Address, utils::convertViaPun<uint64_t>(Val), Ordering));
+    return utils::bitCast<double>(
+        max((int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering));
+  return utils::bitCast<double>(
+      min((uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering));
 }
 
 template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
@@ -126,10 +126,10 @@ template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
 utils::enable_if_t<utils::is_same_v<V, float>, V>
 min(Ty *Address, V Val, atomic::OrderingTy Ordering) {
   if (Val >= 0)
-    return utils::convertViaPun<float>(
-        min((int32_t *)Address, utils::convertViaPun<int32_t>(Val), Ordering));
-  return utils::convertViaPun<float>(
-      max((uint32_t *)Address, utils::convertViaPun<uint32_t>(Val), Ordering));
+    return utils::bitCast<float>(
+        min((int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering));
+  return utils::bitCast<float>(
+      max((uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering));
 }
 
 // TODO: Implement this with __atomic_fetch_max and remove the duplication.
@@ -138,10 +138,10 @@ utils::enable_if_t<utils::is_same_v<V, double>, V>
 min(Ty *Address, utils::remove_addrspace_t<Ty> Val,
     atomic::OrderingTy Ordering) {
   if (Val >= 0)
-    return utils::convertViaPun<double>(
-        min((int64_t *)Address, utils::convertViaPun<int64_t>(Val), Ordering));
-  return utils::convertViaPun<double>(
-      max((uint64_t *)Address, utils::convertViaPun<uint64_t>(Val), Ordering));
+    return utils::bitCast<double>(
+        min((int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering));
+  return utils::bitCast<double>(
+      max((uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering));
 }
 
 template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp
index 881bd12f03405..8583a539824c8 100644
--- a/offload/DeviceRTL/src/Mapping.cpp
+++ b/offload/DeviceRTL/src/Mapping.cpp
@@ -371,8 +371,8 @@ int ompx_shfl_down_sync_i(uint64_t mask, int var, unsigned delta, int width) {
 
 float ompx_shfl_down_sync_f(uint64_t mask, float var, unsigned delta,
                             int width) {
-  return utils::convertViaPun<float>(utils::shuffleDown(
-      mask, utils::convertViaPun<int32_t>(var), delta, width));
+  return utils::bitCast<float>(
+      utils::shuffleDown(mask, utils::bitCast<int32_t>(var), delta, width));
 }
 
 long ompx_shfl_down_sync_l(uint64_t mask, long var, unsigned delta, int width) {
@@ -381,8 +381,8 @@ long ompx_shfl_down_sync_l(uint64_t mask, long var, unsigned delta, int width) {
 
 double ompx_shfl_down_sync_d(uint64_t mask, double var, unsigned delta,
                              int width) {
-  return utils::convertViaPun<double>(utils::shuffleDown(
-      mask, utils::convertViaPun<int64_t>(var), delta, width));
+  return utils::bitCast<double>(
+      utils::shuffleDown(mask, utils::bitCast<int64_t>(var), delta, width));
 }
 }
 
diff --git a/offload/include/Shared/Utils.h b/offload/include/Shared/Utils.h
index 83a82678312c1..523e6bc505b81 100644
--- a/offload/include/Shared/Utils.h
+++ b/offload/include/Shared/Utils.h
@@ -68,11 +68,6 @@ inline uint32_t popc(uint64_t V) {
   return __builtin_popcountl(V);
 }
 
-template <typename DstTy, typename SrcTy> inline DstTy convertViaPun(SrcTy V) {
-  static_assert(sizeof(DstTy) == sizeof(SrcTy), "Bad conversion");
-  return *((DstTy *)(&V));
-}
-
 } // namespace utils
 
 #endif // OMPTARGET_SHARED_UTILS_H

From 3055e86c719c6cc72e50bb74a0e3b9cffebb41b5 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson@google.com>
Date: Thu, 9 Jan 2025 12:01:43 -0800
Subject: [PATCH 51/79] [MemProf] Disable cloning of callsites in recursive
 cycles by default (#122354)

This disables the support added in PR121985 by default while we
investigate a compile time crash.
---
 .../IPO/MemProfContextDisambiguation.cpp      |  4 ++--
 llvm/test/ThinLTO/X86/memprof-recursive.ll    | 19 ++++++++++++++++++-
 .../MemProfContextDisambiguation/recursive.ll | 14 +++++++++++++-
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 016db55c99c3e..3d0e5cb5f97b4 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -122,9 +122,9 @@ static cl::opt<unsigned>
                         cl::desc("Max depth to recursively search for missing "
                                  "frames through tail calls."));
 
-// By default enable cloning of callsites involved with recursive cycles
+// Optionally enable cloning of callsites involved with recursive cycles
 static cl::opt<bool> AllowRecursiveCallsites(
-    "memprof-allow-recursive-callsites", cl::init(true), cl::Hidden,
+    "memprof-allow-recursive-callsites", cl::init(false), cl::Hidden,
     cl::desc("Allow cloning of callsites involved in recursive cycles"));
 
 // When disabled, try to detect and prevent cloning of recursive contexts.
diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll
index 2b1d7081b7610..1f4f75460b070 100644
--- a/llvm/test/ThinLTO/X86/memprof-recursive.ll
+++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll
@@ -5,7 +5,7 @@
 
 ; RUN: opt -thinlto-bc %s >%t.o
 
-;; By default we should enable cloning of contexts involved with recursive
+;; Check behavior when we enable cloning of contexts involved with recursive
 ;; cycles, but not through the cycle itself. I.e. until full support for
 ;; recursion is added, the cloned recursive call from C back to B (line 12) will
 ;; not be updated to call a clone.
@@ -18,6 +18,7 @@
 ; RUN:  -r=%t.o,_Znam, \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes \
 ; RUN:  -pass-remarks=memprof-context-disambiguation \
+; RUN:	-memprof-allow-recursive-callsites=true \
 ; RUN:  -o %t.out 2>&1 | FileCheck %s \
 ; RUN:  --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
 ; RUN:  --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS
@@ -38,6 +39,21 @@
 ; RUN:  --implicit-check-not="created clone" \
 ; RUN:	--implicit-check-not="marked with memprof allocation attribute cold"
 
+;; Check the default behavior (disabled recursive callsites).
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
+; RUN:  -r=%t.o,_Z1Dv,plx \
+; RUN:  -r=%t.o,_Z1Ci,plx \
+; RUN:  -r=%t.o,_Z1Bi,plx \
+; RUN:  -r=%t.o,main,plx \
+; RUN:  -r=%t.o,_Znam, \
+; RUN:  -memprof-verify-ccg -memprof-verify-nodes \
+; RUN:  -pass-remarks=memprof-context-disambiguation \
+; RUN:  -o %t.out 2>&1 | FileCheck %s --allow-empty \
+; RUN:  --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
+; RUN:  --implicit-check-not="created clone" \
+; RUN:	--implicit-check-not="marked with memprof allocation attribute cold"
+
 ;; Skipping recursive contexts should prevent spurious call to cloned version of
 ;; B from the context starting at memprof_recursive.cc:19:13, which is actually
 ;; recursive (until that support is added).
@@ -50,6 +66,7 @@
 ; RUN:  -r=%t.o,_Znam, \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes \
 ; RUN:  -pass-remarks=memprof-context-disambiguation \
+; RUN:	-memprof-allow-recursive-callsites=true \
 ; RUN:	-memprof-allow-recursive-contexts=false \
 ; RUN:  -o %t.out 2>&1 | FileCheck %s \
 ; RUN:  --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
index 759d5115896c1..fd87cb535c42e 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll
@@ -34,13 +34,14 @@
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
 
-;; By default we should enable cloning of contexts involved with recursive
+;; Check behavior when we enable cloning of contexts involved with recursive
 ;; cycles, but not through the cycle itself. I.e. until full support for
 ;; recursion is added, the cloned recursive call from C back to B (line 12) will
 ;; not be updated to call a clone.
 ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes \
 ; RUN:  -pass-remarks=memprof-context-disambiguation \
+; RUN:	-memprof-allow-recursive-callsites=true \
 ; RUN:  %s -S 2>&1 | FileCheck %s \
 ; RUN:  --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
 ; RUN:  --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS
@@ -56,12 +57,23 @@
 ; RUN:	--implicit-check-not="marked with memprof allocation attribute cold" \
 ; RUN:  --check-prefix=ALL
 
+;; Check the default behavior (disabled recursive callsites).
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN:  -memprof-verify-ccg -memprof-verify-nodes \
+; RUN:  -pass-remarks=memprof-context-disambiguation \
+; RUN:  %s -S 2>&1 | FileCheck %s \
+; RUN:  --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \
+; RUN:  --implicit-check-not="created clone" \
+; RUN:	--implicit-check-not="marked with memprof allocation attribute cold" \
+; RUN:  --check-prefix=ALL
+
 ;; Skipping recursive contexts should prevent spurious call to cloned version of
 ;; B from the context starting at memprof_recursive.cc:19:13, which is actually
 ;; recursive (until that support is added).
 ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes \
 ; RUN:  -pass-remarks=memprof-context-disambiguation \
+; RUN:	-memprof-allow-recursive-callsites=true \
 ; RUN:	-memprof-allow-recursive-contexts=false \
 ; RUN:  %s -S 2>&1 | FileCheck %s \
 ; RUN:  --implicit-check-not "memprof_recursive3.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \

From 218f15cd1eee22933e505d7093ec2fe38a36ef3b Mon Sep 17 00:00:00 2001
From: Chris B <chris.bieneman@me.com>
Date: Thu, 9 Jan 2025 14:10:44 -0600
Subject: [PATCH 52/79] Add pre-merge workflow for HLSL testing (#122184)

This adds a workflow for running HLSL tests on PRs that modify HLSL and
DirectX code.

The tests enabled here are the LLVM & Clang tests and the Offload
execution tests: https://github.com/llvm-beanz/offload-test-suite/
---
 .github/workflows/hlsl-matrix.yaml   | 30 ++++++++++
 .github/workflows/hlsl-test-all.yaml | 87 ++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 .github/workflows/hlsl-matrix.yaml
 create mode 100644 .github/workflows/hlsl-test-all.yaml

diff --git a/.github/workflows/hlsl-matrix.yaml b/.github/workflows/hlsl-matrix.yaml
new file mode 100644
index 0000000000000..c63a32acd2b3e
--- /dev/null
+++ b/.github/workflows/hlsl-matrix.yaml
@@ -0,0 +1,30 @@
+name: HLSL Tests
+
+permissions:
+  contents: read
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - llvm/**/DirectX/**
+      - .github/workflows/hlsl*
+      - clang/*HLSL*/**/*
+      - clang/**/*HLSL*
+      - llvm/**/Frontend/HLSL/**/*
+
+jobs:
+  HLSL-Tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        runs-on:
+          - hlsl-macos
+
+    uses: ./.github/workflows/hlsl-test-all.yaml
+    with:
+      SKU: hlsl-macos
+      TestTarget: check-hlsl-clang-mtl # TODO: This target changes based on SKU
+      LLVM-ref: ${{ github.ref }}
diff --git a/.github/workflows/hlsl-test-all.yaml b/.github/workflows/hlsl-test-all.yaml
new file mode 100644
index 0000000000000..93a1c6d2662d4
--- /dev/null
+++ b/.github/workflows/hlsl-test-all.yaml
@@ -0,0 +1,87 @@
+name: HLSL Test
+
+permissions:
+  contents: read
+
+on:
+  workflow_call:
+    inputs:
+      OffloadTest-branch:
+        description: 'Test Suite Branch'
+        required: false
+        default: 'main'
+        type: string
+      LLVM-ref:
+        description: 'LLVM Branch'
+        required: false
+        default: 'main'
+        type: string
+      SKU:
+        required: true
+        type: string
+      TestTarget:
+        required: false
+        default: 'check-hlsl'
+        type: string
+
+jobs:
+  build:
+    runs-on: ${{ inputs.SKU }}
+    steps:
+      - name: Checkout DXC
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        with:
+          repository: Microsoft/DirectXShaderCompiler
+          ref: main
+          path: DXC
+          submodules: true
+      - name: Checkout LLVM
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        with:
+          ref: ${{ inputs.LLVM-branch }}
+          path: llvm-project
+      - name: Checkout OffloadTest
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        with:
+          repository: llvm-beanz/offload-test-suite
+          ref: main
+          path: OffloadTest
+      - name: Checkout Golden Images
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        with:
+          repository: llvm-beanz/offload-golden-images
+          ref: main
+          path: golden-images
+      - name: Setup Windows
+        if: runner.os == 'Windows'
+        uses: llvm/actions/setup-windows@main
+        with:
+          arch: amd64
+      - name: Build DXC
+        run: |
+            cd DXC
+            mkdir build
+            cd build
+            cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -C ${{ github.workspace }}/DXC/cmake/caches/PredefinedParams.cmake -C ${{ github.workspace }}/OffloadTest/cmake/caches/sccache.cmake -DHLSL_DISABLE_SOURCE_GENERATION=On ${{ github.workspace }}/DXC/
+            ninja dxv llvm-dis
+      - name: Build LLVM
+        run: |
+            cd llvm-project
+            mkdir build
+            cd build
+            cmake -G Ninja -DDXIL_DIS=${{ github.workspace }}/DXC/build/bin/llvm-dis -DLLVM_INCLUDE_DXIL_TESTS=On -DCMAKE_BUILD_TYPE=Release -C ${{ github.workspace }}/llvm-project/clang/cmake/caches/HLSL.cmake -C ${{ github.workspace }}/OffloadTest/cmake/caches/sccache.cmake -DDXC_DIR=${{ github.workspace }}/DXC/build/bin -DLLVM_EXTERNAL_OFFLOADTEST_SOURCE_DIR=${{ github.workspace }}/OffloadTest -DLLVM_EXTERNAL_PROJECTS="OffloadTest" -DLLVM_LIT_ARGS="--xunit-xml-output=testresults.xunit.xml -v" -DGOLDENIMAGE_DIR=${{ github.workspace }}/golden-images ${{ github.workspace }}/llvm-project/llvm/
+            ninja hlsl-test-depends llvm-test-depends clang-test-depends
+      - name: Run HLSL Tests
+        run: |
+            cd llvm-project
+            cd build
+            ninja check-llvm
+            ninja check-clang
+            ninja check-hlsl-unit
+            ninja ${{ inputs.TestTarget }}
+      - name: Publish Test Results
+        uses: EnricoMi/publish-unit-test-result-action/macos@170bf24d20d201b842d7a52403b73ed297e6645b # v2
+        if: always() && runner.os == 'macOS'
+        with:
+          comment_mode: off
+          files: llvm-project/build/**/testresults.xunit.xml

From 9d5299eb61a64cd4df5fefa0299b0cf8d917978f Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra@codasip.com>
Date: Thu, 9 Jan 2025 20:18:34 +0000
Subject: [PATCH 53/79] VT/test: pre-commit tests to enable samesign optz
 (#120257)

Pre-commit some tests in preparation to teach ValueTracking's
implied-cond about samesign.
---
 .../implied-condition-samesign.ll             | 286 ++++++++++++++++++
 1 file changed, 286 insertions(+)
 create mode 100644 llvm/test/Analysis/ValueTracking/implied-condition-samesign.ll

diff --git a/llvm/test/Analysis/ValueTracking/implied-condition-samesign.ll b/llvm/test/Analysis/ValueTracking/implied-condition-samesign.ll
new file mode 100644
index 0000000000000..0ea69c2cd4d47
--- /dev/null
+++ b/llvm/test/Analysis/ValueTracking/implied-condition-samesign.ll
@@ -0,0 +1,286 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=instsimplify -S %s | FileCheck %s
+
+define i1 @incr_sle(i32 %i, i32 %len) {
+; CHECK-LABEL: define i1 @incr_sle(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:    [[I_INCR:%.*]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I_GT_LEN:%.*]] = icmp samesign ugt i32 [[I]], [[LEN]]
+; CHECK-NEXT:    [[I_INCR_SGT_LEN:%.*]] = icmp sgt i32 [[I_INCR]], [[LEN]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp sle i1 [[I_INCR_SGT_LEN]], [[I_GT_LEN]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %i.incr = add nsw nuw i32 %i, 1
+  %i.gt.len = icmp samesign ugt i32 %i, %len
+  %i.incr.sgt.len = icmp sgt i32 %i.incr, %len
+  %res = icmp sle i1 %i.incr.sgt.len, %i.gt.len
+  ret i1 %res
+}
+
+define i1 @incr_sle_no_nsw_nuw(i32 %i, i32 %len) {
+; CHECK-LABEL: define i1 @incr_sle_no_nsw_nuw(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:    [[I_INCR:%.*]] = add i32 [[I]], 1
+; CHECK-NEXT:    [[I_GT_LEN:%.*]] = icmp samesign ugt i32 [[I]], [[LEN]]
+; CHECK-NEXT:    [[I_INCR_SGT_LEN:%.*]] = icmp sgt i32 [[I_INCR]], [[LEN]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp sle i1 [[I_INCR_SGT_LEN]], [[I_GT_LEN]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %i.incr = add i32 %i, 1
+  %i.gt.len = icmp samesign ugt i32 %i, %len
+  %i.incr.sgt.len = icmp sgt i32 %i.incr, %len
+  %res = icmp sle i1 %i.incr.sgt.len, %i.gt.len
+  ret i1 %res
+}
+
+define i1 @incr_sge(i32 %i, i32 %len) {
+; CHECK-LABEL: define i1 @incr_sge(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:    [[I_INCR:%.*]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I_LT_LEN:%.*]] = icmp samesign ult i32 [[I]], [[LEN]]
+; CHECK-NEXT:    [[I_INCR_SLT_LEN:%.*]] = icmp slt i32 [[I_INCR]], [[LEN]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp sge i1 [[I_INCR_SLT_LEN]], [[I_LT_LEN]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %i.incr = add nsw nuw i32 %i, 1
+  %i.lt.len = icmp samesign ult i32 %i, %len
+  %i.incr.slt.len = icmp slt i32 %i.incr, %len
+  %res = icmp sge i1 %i.incr.slt.len, %i.lt.len
+  ret i1 %res
+}
+
+define i1 @incr_sge_no_nsw_nuw(i32 %i, i32 %len) {
+; CHECK-LABEL: define i1 @incr_sge_no_nsw_nuw(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:    [[I_INCR:%.*]] = add i32 [[I]], 1
+; CHECK-NEXT:    [[I_LT_LEN:%.*]] = icmp samesign ult i32 [[I]], [[LEN]]
+; CHECK-NEXT:    [[I_INCR_SLT_LEN:%.*]] = icmp slt i32 [[I_INCR]], [[LEN]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp sge i1 [[I_INCR_SLT_LEN]], [[I_LT_LEN]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %i.incr = add i32 %i, 1
+  %i.lt.len = icmp samesign ult i32 %i, %len
+  %i.incr.slt.len = icmp slt i32 %i.incr, %len
+  %res = icmp sge i1 %i.incr.slt.len, %i.lt.len
+  ret i1 %res
+}
+
+define i1 @incr_ule(i32 %i, i32 %len) {
+; CHECK-LABEL: define i1 @incr_ule(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:    [[I_INCR:%.*]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I_GT_LEN:%.*]] = icmp samesign ugt i32 [[I]], [[LEN]]
+; CHECK-NEXT:    [[I_INCR_SGT_LEN:%.*]] = icmp sgt i32 [[I_INCR]], [[LEN]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp ule i1 [[I_GT_LEN]], [[I_INCR_SGT_LEN]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %i.incr = add nsw nuw i32 %i, 1
+  %i.gt.len = icmp samesign ugt i32 %i, %len
+  %i.incr.sgt.len = icmp sgt i32 %i.incr, %len
+  %res = icmp ule i1 %i.gt.len, %i.incr.sgt.len
+  ret i1 %res
+}
+
+define i1 @incr_ule_no_nsw_nuw(i32 %i, i32 %len) {
+; CHECK-LABEL: define i1 @incr_ule_no_nsw_nuw(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:    [[I_INCR:%.*]] = add i32 [[I]], 1
+; CHECK-NEXT:    [[I_GT_LEN:%.*]] = icmp samesign ugt i32 [[I]], [[LEN]]
+; CHECK-NEXT:    [[I_INCR_SGT_LEN:%.*]] = icmp sgt i32 [[I_INCR]], [[LEN]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp ule i1 [[I_GT_LEN]], [[I_INCR_SGT_LEN]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %i.incr = add i32 %i, 1
+  %i.gt.len = icmp samesign ugt i32 %i, %len
+  %i.incr.sgt.len = icmp sgt i32 %i.incr, %len
+  %res = icmp ule i1 %i.gt.len, %i.incr.sgt.len
+  ret i1 %res
+}
+
+define i1 @incr_uge(i32 %i, i32 %len) {
+; CHECK-LABEL: define i1 @incr_uge(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:    [[I_INCR:%.*]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I_LT_LEN:%.*]] = icmp samesign ult i32 [[I]], [[LEN]]
+; CHECK-NEXT:    [[I_INCR_SLT_LEN:%.*]] = icmp slt i32 [[I_INCR]], [[LEN]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp uge i1 [[I_LT_LEN]], [[I_INCR_SLT_LEN]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %i.incr = add nsw nuw i32 %i, 1
+  %i.lt.len = icmp samesign ult i32 %i, %len
+  %i.incr.slt.len = icmp slt i32 %i.incr, %len
+  %res = icmp uge i1 %i.lt.len, %i.incr.slt.len
+  ret i1 %res
+}
+
+define i1 @incr_uge_no_nsw_nuw(i32 %i, i32 %len) {
+; CHECK-LABEL: define i1 @incr_uge_no_nsw_nuw(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:    [[I_INCR:%.*]] = add i32 [[I]], 1
+; CHECK-NEXT:    [[I_LT_LEN:%.*]] = icmp samesign ult i32 [[I]], [[LEN]]
+; CHECK-NEXT:    [[I_INCR_SLT_LEN:%.*]] = icmp slt i32 [[I_INCR]], [[LEN]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp uge i1 [[I_LT_LEN]], [[I_INCR_SLT_LEN]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %i.incr = add i32 %i, 1
+  %i.lt.len = icmp samesign ult i32 %i, %len
+  %i.incr.slt.len = icmp slt i32 %i.incr, %len
+  %res = icmp uge i1 %i.lt.len, %i.incr.slt.len
+  ret i1 %res
+}
+
+define i1 @sgt_implies_ge_via_assume(i32 %i, i32 %j) {
+; CHECK-LABEL: define i1 @sgt_implies_ge_via_assume(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]]) {
+; CHECK-NEXT:    [[I_SGT_J:%.*]] = icmp sgt i32 [[I]], [[J]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[I_SGT_J]])
+; CHECK-NEXT:    [[I_GE_J:%.*]] = icmp samesign uge i32 [[I]], [[J]]
+; CHECK-NEXT:    ret i1 [[I_GE_J]]
+;
+  %i.sgt.j = icmp sgt i32 %i, %j
+  call void @llvm.assume(i1 %i.sgt.j)
+  %i.ge.j = icmp samesign uge i32 %i, %j
+  ret i1 %i.ge.j
+}
+
+define i32 @gt_implies_sge_dominating(i32 %a, i32 %len) {
+; CHECK-LABEL: define i32 @gt_implies_sge_dominating(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_GT_LEN:%.*]] = icmp samesign ugt i32 [[A]], [[LEN]]
+; CHECK-NEXT:    br i1 [[A_GT_LEN]], label %[[TAKEN:.*]], label %[[END:.*]]
+; CHECK:       [[TAKEN]]:
+; CHECK-NEXT:    [[A_SGE_LEN:%.*]] = icmp sge i32 [[A]], [[LEN]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[A_SGE_LEN]], i32 30, i32 0
+; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i32 -1
+;
+entry:
+  %a.gt.len = icmp samesign ugt i32 %a, %len
+  br i1 %a.gt.len, label %taken, label %end
+
+taken:
+  %a.sge.len = icmp sge i32 %a, %len
+  %res = select i1 %a.sge.len, i32 30, i32 0
+  ret i32 %res
+
+end:
+  ret i32 -1
+}
+
+define i32 @gt_implies_sge_dominating_cr(i32 %a, i32 %len) {
+; CHECK-LABEL: define i32 @gt_implies_sge_dominating_cr(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_GT_20:%.*]] = icmp samesign ugt i32 [[A]], 20
+; CHECK-NEXT:    br i1 [[A_GT_20]], label %[[TAKEN:.*]], label %[[END:.*]]
+; CHECK:       [[TAKEN]]:
+; CHECK-NEXT:    [[A_SGE_10:%.*]] = icmp sge i32 [[A]], 10
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[A_SGE_10]], i32 30, i32 0
+; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i32 -1
+;
+entry:
+  %a.gt.20 = icmp samesign ugt i32 %a, 20
+  br i1 %a.gt.20, label %taken, label %end
+
+taken:
+  %a.sge.10 = icmp sge i32 %a, 10
+  %res = select i1 %a.sge.10, i32 30, i32 0
+  ret i32 %res
+
+end:
+  ret i32 -1
+}
+
+define i32 @sgt_implies_ge_dominating_cr(i32 %a, i32 %len) {
+; CHECK-LABEL: define i32 @sgt_implies_ge_dominating_cr(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[LEN:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_SGT_MINUS_10:%.*]] = icmp sgt i32 [[A]], -10
+; CHECK-NEXT:    br i1 [[A_SGT_MINUS_10]], label %[[TAKEN:.*]], label %[[END:.*]]
+; CHECK:       [[TAKEN]]:
+; CHECK-NEXT:    [[A_GE_MINUS_20:%.*]] = icmp samesign uge i32 [[A]], -20
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[A_GE_MINUS_20]], i32 30, i32 0
+; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i32 -1
+;
+entry:
+  %a.sgt.minus.10 = icmp sgt i32 %a, -10
+  br i1 %a.sgt.minus.10, label %taken, label %end
+
+taken:
+  %a.ge.minus.20 = icmp samesign uge i32 %a, -20
+  %res = select i1 %a.ge.minus.20, i32 30, i32 0
+  ret i32 %res
+
+end:
+  ret i32 -1
+}
+
+define i32 @gt_sub_nsw(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @gt_sub_nsw(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X_GT_Y:%.*]] = icmp samesign ugt i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[X_GT_Y]], label %[[TAKEN:.*]], label %[[END:.*]]
+; CHECK:       [[TAKEN]]:
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUB]], 1
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[SUB]], -1
+; CHECK-NEXT:    [[ABSCOND:%.*]] = icmp samesign ult i32 [[SUB]], -1
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[ABSCOND]], i32 [[NEG]], i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %x.gt.y = icmp samesign ugt i32 %x, %y
+  br i1 %x.gt.y, label %taken, label %end
+
+taken:
+  %sub = sub nsw i32 %x, %y
+  %add = add nsw i32 %sub, 1
+  %neg = xor i32 %sub, -1
+  %abscond = icmp samesign ult i32 %sub, -1
+  %abs = select i1 %abscond, i32 %neg, i32 %add
+  ret i32 %abs
+
+end:
+  ret i32 0
+}
+
+define i32 @ge_sub_nsw(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @ge_sub_nsw(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X_GE_Y:%.*]] = icmp samesign uge i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[X_GE_Y]], label %[[TAKEN:.*]], label %[[END:.*]]
+; CHECK:       [[TAKEN]]:
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUB]], 1
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[SUB]], -1
+; CHECK-NEXT:    [[ABSCOND:%.*]] = icmp samesign ult i32 [[SUB]], -1
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[ABSCOND]], i32 [[NEG]], i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %x.ge.y = icmp samesign uge i32 %x, %y
+  br i1 %x.ge.y, label %taken, label %end
+
+taken:
+  %sub = sub nsw i32 %x, %y
+  %add = add nsw i32 %sub, 1
+  %neg = xor i32 %sub, -1
+  %abscond = icmp samesign ult i32 %sub, -1
+  %abs = select i1 %abscond, i32 %neg, i32 %add
+  ret i32 %abs
+
+end:
+  ret i32 0
+}

From f791a4f19f6c99feccfe59e0724d9215bee985cf Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 9 Jan 2025 21:38:29 +0100
Subject: [PATCH 54/79] [bazel] Add missing dependency for
 cbcb7ad32e6faca1f4c0f2f436e6076774104e17

---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index e823af2f14712..274cec0c187a4 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -10571,6 +10571,7 @@ cc_library(
         ":OpenACCTypeInterfacesIncGen",
         ":OpenACCTypesIncGen",
         ":SideEffectInterfaces",
+        ":Support",
         ":TransformUtils",
         "//llvm:Support",
     ],

From d797d94185cfb3eadaef6103b0d2b2f312e4f432 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 9 Jan 2025 21:39:14 +0100
Subject: [PATCH 55/79] [bazel] Port 0aa831e0edb1c1deabb96ce2435667cc82bac79b

---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel      | 1 +
 utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 274cec0c187a4..5c2a77ca67fd4 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5717,6 +5717,7 @@ cc_library(
         ":SCFDialect",
         ":SideEffectInterfaces",
         ":Support",
+        ":ValueBoundsOpInterface",
         "//llvm:Core",
         "//llvm:Support",
     ],
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 7d51a3829e912..873fb2d18bfb2 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -548,6 +548,7 @@ cc_library(
         "//llvm:Support",
         "//mlir:ConvertToSPIRV",
         "//mlir:FuncDialect",
+        "//mlir:GPUDialect",
         "//mlir:GPUToSPIRV",
         "//mlir:GPUTransforms",
         "//mlir:IR",
@@ -695,6 +696,7 @@ cc_library(
         "//mlir:ArithTransforms",
         "//mlir:DialectUtils",
         "//mlir:FuncDialect",
+        "//mlir:FunctionInterfaces",
         "//mlir:IR",
         "//mlir:MemRefDialect",
         "//mlir:Pass",

From 8ea8e7f52908a831ab44ffca1e0c8fe207a409c8 Mon Sep 17 00:00:00 2001
From: Tom Honermann <tom.honermann@intel.com>
Date: Thu, 9 Jan 2025 15:42:29 -0500
Subject: [PATCH 56/79] [SYCL] Basic diagnostics for the
 sycl_kernel_entry_point attribute. (#120327)

The `sycl_kernel_entry_point` attribute is used to declare a function that
defines a pattern for an offload kernel entry point. The attribute requires
a single type argument that specifies a class type that meets the requirements
for a SYCL kernel name as described in section 5.2, "Naming of kernels", of
the SYCL 2020 specification. A unique kernel name type is required for each
function declared with the attribute. The attribute may not first appear on a
declaration that follows a definition of the function. The function is
required to have a non-deduced `void` return type. The function must not be
a non-static member function, be deleted or defaulted, be declared with the
`constexpr` or `consteval` specifiers, be declared with the `[[noreturn]]`
attribute, be a coroutine, or accept variadic arguments.

Diagnostics are not yet provided for the following:
- Use of a type as a kernel name that does not satisfy the forward
  declarability requirements specified in section 5.2, "Naming of kernels",
  of the SYCL 2020 specification.
- Use of a type as a parameter of the attributed function that does not
  satisfy the kernel parameter requirements specified in section 4.12.4,
  "Rules for parameter passing to kernels", of the SYCL 2020 specification
  (each such function parameter constitutes a kernel parameter).
- Use of language features that are not permitted in device functions as
  specified in section 5.4, "Language restrictions for device functions",
  of the SYCL 2020 specification.

There are several issues noted by various FIXME comments.
- The diagnostic generated for kernel name conflicts needs additional work
  to better detail the relevant source locations; such as the location of
  each declaration as well as the original source of each kernel name.
- A number of the tests illustrate spurious errors being produced due to
  attributes that appertain to function templates being instantiated too
  early (during overload resolution as opposed to after an overload is
  selected).

Included changes allow the `SYCLKernelEntryPointAttr` attribute to be
marked as invalid if a `sycl_kernel_entry_point` attribute is used incorrectly.
This is intended to prevent trying to emit an offload kernel entry point
without having to mark the associated function as invalid since doing so
would affect overload resolution; which this attribute should not do.
Unfortunately, Clang eagerly instantiates attributes that appertain to
functions with the result that errors might be issued for function
declarations that are never selected by overload resolution. Tests have
been added to demonstrate this. Further work will be needed to address
these issues (for this and other attributes).
---
 clang/include/clang/AST/ASTContext.h          |  10 +
 clang/include/clang/Basic/Attr.td             |  13 +-
 clang/include/clang/Basic/AttrDocs.td         |   2 +-
 clang/include/clang/Basic/DiagnosticGroups.td |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td        |  27 ++
 clang/include/clang/Sema/SemaSYCL.h           |   2 +
 clang/lib/AST/ASTContext.cpp                  |  13 +
 clang/lib/Sema/SemaDecl.cpp                   |  36 +-
 clang/lib/Sema/SemaLambda.cpp                 |   5 +
 clang/lib/Sema/SemaSYCL.cpp                   | 156 ++++++++
 clang/lib/Serialization/ASTReaderDecl.cpp     |  13 +-
 .../ast-dump-sycl-kernel-entry-point.cpp      |  23 +-
 ...-kernel-entry-point-attr-appertainment.cpp | 352 ++++++++++++++++++
 .../sycl-kernel-entry-point-attr-grammar.cpp  |  15 -
 ...el-entry-point-attr-kernel-name-module.cpp | 104 ++++++
 ...ernel-entry-point-attr-kernel-name-pch.cpp |  36 ++
 ...cl-kernel-entry-point-attr-kernel-name.cpp | 118 ++++++
 .../sycl-kernel-entry-point-attr-sfinae.cpp   |  65 ++++
 18 files changed, 966 insertions(+), 25 deletions(-)
 create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
 create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp
 create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp
 create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp
 create mode 100644 clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 1e89a6805ce9c..0e07c5d6ce8fb 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -3360,6 +3360,16 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// this function.
   void registerSYCLEntryPointFunction(FunctionDecl *FD);
 
+  /// Given a type used as a SYCL kernel name, returns a reference to the
+  /// metadata generated from the corresponding SYCL kernel entry point.
+  /// Aborts if the provided type is not a registered SYCL kernel name.
+  const SYCLKernelInfo &getSYCLKernelInfo(QualType T) const;
+
+  /// Returns a pointer to the metadata generated from the corresponding
+  /// SYCLkernel entry point if the provided type corresponds to a registered
+  /// SYCL kernel name. Returns a null pointer otherwise.
+  const SYCLKernelInfo *findSYCLKernelInfo(QualType T) const;
+
   //===--------------------------------------------------------------------===//
   //                    Statistics
   //===--------------------------------------------------------------------===//
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index e51a74655dd5e..5039c20d8b73b 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1516,11 +1516,22 @@ def SYCLKernel : InheritableAttr {
 
 def SYCLKernelEntryPoint : InheritableAttr {
   let Spellings = [Clang<"sycl_kernel_entry_point">];
-  let Args = [TypeArgument<"KernelName">];
+  let Args = [
+    // KernelName is required and specifies the kernel name type.
+    TypeArgument<"KernelName">,
+    // InvalidAttr is a fake argument used to track whether the
+    // semantic requirements of the attribute have been satisified.
+    // A fake argument is used to enable serialization support.
+    DefaultBoolArgument<"Invalid", /*default=*/0, /*fake=*/1>
+  ];
   let Subjects = SubjectList<[Function], ErrorDiag>;
   let TemplateDependent = 1;
   let LangOpts = [SYCLHost, SYCLDevice];
   let Documentation = [SYCLKernelEntryPointDocs];
+  let AdditionalMembers = [{
+    void setInvalidAttr() { invalid = true; }
+    bool isInvalidAttr() const { return invalid; }
+  }];
 }
 
 def SYCLSpecialClass: InheritableAttr {
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index b8d702e41aa0b..506fe38eb882b 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -475,7 +475,7 @@ not first appear on a declaration that follows a definition of the function.
 The attribute only appertains to functions and only those that meet the
 following requirements.
 
-* Has a ``void`` return type.
+* Has a non-deduced ``void`` return type.
 * Is not a non-static member function, constructor, or destructor.
 * Is not a C variadic function.
 * Is not a coroutine.
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 3ac490d30371b..594e99a19b64d 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -648,6 +648,7 @@ def PoundPragmaMessage : DiagGroup<"#pragma-messages">,
 def : DiagGroup<"redundant-decls">;
 def RedeclaredClassMember : DiagGroup<"redeclared-class-member">;
 def GNURedeclaredEnum : DiagGroup<"gnu-redeclared-enum">;
+def RedundantAttribute : DiagGroup<"redundant-attribute">;
 def RedundantMove : DiagGroup<"redundant-move">;
 def Register : DiagGroup<"register", [DeprecatedRegister]>;
 def ReturnTypeCLinkage : DiagGroup<"return-type-c-linkage">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index ab2d6237c1cab..d4e897868f1a9 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12429,6 +12429,33 @@ def err_sycl_special_type_num_init_method : Error<
   "types with 'sycl_special_class' attribute must have one and only one '__init' "
   "method defined">;
 
+// SYCL kernel entry point diagnostics
+def err_sycl_entry_point_invalid : Error<
+  "'sycl_kernel_entry_point' attribute cannot be applied to a"
+  " %select{non-static member function|variadic function|deleted function|"
+           "defaulted function|constexpr function|consteval function|"
+           "function declared with the 'noreturn' attribute|coroutine}0">;
+def err_sycl_entry_point_invalid_redeclaration : Error<
+  "'sycl_kernel_entry_point' kernel name argument does not match prior"
+  " declaration%diff{: $ vs $|}0,1">;
+def err_sycl_kernel_name_conflict : Error<
+  "'sycl_kernel_entry_point' kernel name argument conflicts with a previous"
+  " declaration">;
+def warn_sycl_kernel_name_not_a_class_type : Warning<
+  "%0 is not a valid SYCL kernel name type; a non-union class type is required">,
+  InGroup<DiagGroup<"nonportable-sycl">>, DefaultError;
+def warn_sycl_entry_point_redundant_declaration : Warning<
+  "redundant 'sycl_kernel_entry_point' attribute">, InGroup<RedundantAttribute>;
+def err_sycl_entry_point_after_definition : Error<
+  "'sycl_kernel_entry_point' attribute cannot be added to a function after the"
+  " function is defined">;
+def err_sycl_entry_point_return_type : Error<
+  "'sycl_kernel_entry_point' attribute only applies to functions with a"
+  " 'void' return type">;
+def err_sycl_entry_point_deduced_return_type : Error<
+  "'sycl_kernel_entry_point' attribute only applies to functions with a"
+  " non-deduced 'void' return type">;
+
 def warn_cuda_maxclusterrank_sm_90 : Warning<
   "maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring "
   "%1 attribute">, InGroup<IgnoredAttributes>;
diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h
index c9f3358124eda..5bb0de40c886c 100644
--- a/clang/include/clang/Sema/SemaSYCL.h
+++ b/clang/include/clang/Sema/SemaSYCL.h
@@ -63,6 +63,8 @@ class SemaSYCL : public SemaBase {
 
   void handleKernelAttr(Decl *D, const ParsedAttr &AL);
   void handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL);
+
+  void CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD);
 };
 
 } // namespace clang
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index b10513f49a8d1..8f04b58841964 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -14502,6 +14502,19 @@ void ASTContext::registerSYCLEntryPointFunction(FunctionDecl *FD) {
       std::make_pair(KernelNameType, BuildSYCLKernelInfo(KernelNameType, FD)));
 }
 
+const SYCLKernelInfo &ASTContext::getSYCLKernelInfo(QualType T) const {
+  CanQualType KernelNameType = getCanonicalType(T);
+  return SYCLKernels.at(KernelNameType);
+}
+
+const SYCLKernelInfo *ASTContext::findSYCLKernelInfo(QualType T) const {
+  CanQualType KernelNameType = getCanonicalType(T);
+  auto IT = SYCLKernels.find(KernelNameType);
+  if (IT != SYCLKernels.end())
+    return &IT->second;
+  return nullptr;
+}
+
 OMPTraitInfo &ASTContext::getNewOMPTraitInfo() {
   OMPTraitInfoVector.emplace_back(new OMPTraitInfo());
   return *OMPTraitInfoVector.back();
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 4001c4d263f1d..75920052c4f0c 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -52,6 +52,7 @@
 #include "clang/Sema/SemaOpenMP.h"
 #include "clang/Sema/SemaPPC.h"
 #include "clang/Sema/SemaRISCV.h"
+#include "clang/Sema/SemaSYCL.h"
 #include "clang/Sema/SemaSwift.h"
 #include "clang/Sema/SemaWasm.h"
 #include "clang/Sema/Template.h"
@@ -2923,7 +2924,7 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) {
 
   AttrVec &NewAttributes = New->getAttrs();
   for (unsigned I = 0, E = NewAttributes.size(); I != E;) {
-    const Attr *NewAttribute = NewAttributes[I];
+    Attr *NewAttribute = NewAttributes[I];
 
     if (isa<AliasAttr>(NewAttribute) || isa<IFuncAttr>(NewAttribute)) {
       if (FunctionDecl *FD = dyn_cast<FunctionDecl>(New)) {
@@ -3018,6 +3019,16 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) {
       // declarations after definitions.
       ++I;
       continue;
+    } else if (isa<SYCLKernelEntryPointAttr>(NewAttribute)) {
+      // Elevate latent uses of the sycl_kernel_entry_point attribute to an
+      // error since the definition will have already been created without
+      // the semantic effects of the attribute having been applied.
+      S.Diag(NewAttribute->getLocation(),
+             diag::err_sycl_entry_point_after_definition);
+      S.Diag(Def->getLocation(), diag::note_previous_definition);
+      cast<SYCLKernelEntryPointAttr>(NewAttribute)->setInvalidAttr();
+      ++I;
+      continue;
     }
 
     S.Diag(NewAttribute->getLocation(),
@@ -12142,8 +12153,8 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
   if (LangOpts.OpenMP)
     OpenMP().ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(NewFD);
 
-  if (LangOpts.isSYCL() && NewFD->hasAttr<SYCLKernelEntryPointAttr>())
-    getASTContext().registerSYCLEntryPointFunction(NewFD);
+  if (NewFD->hasAttr<SYCLKernelEntryPointAttr>())
+    SYCL().CheckSYCLEntryPointFunctionDecl(NewFD);
 
   // Semantic checking for this function declaration (in isolation).
 
@@ -15975,6 +15986,25 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
       CheckCoroutineWrapper(FD);
   }
 
+  // Diagnose invalid SYCL kernel entry point function declarations.
+  if (FD && !FD->isInvalidDecl() && FD->hasAttr<SYCLKernelEntryPointAttr>()) {
+    SYCLKernelEntryPointAttr *SKEPAttr =
+        FD->getAttr<SYCLKernelEntryPointAttr>();
+    if (FD->isDefaulted()) {
+      Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+          << /*defaulted function*/ 3;
+      SKEPAttr->setInvalidAttr();
+    } else if (FD->isDeleted()) {
+      Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+          << /*deleted function*/ 2;
+      SKEPAttr->setInvalidAttr();
+    } else if (FSI->isCoroutine()) {
+      Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+          << /*coroutine*/ 7;
+      SKEPAttr->setInvalidAttr();
+    }
+  }
+
   {
     // Do not call PopExpressionEvaluationContext() if it is a lambda because
     // one is already popped when finishing the lambda in BuildLambdaExpr().
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index a67c0b2b367d1..f2c3a816b3b5d 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -24,6 +24,7 @@
 #include "clang/Sema/SemaCUDA.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaOpenMP.h"
+#include "clang/Sema/SemaSYCL.h"
 #include "clang/Sema/Template.h"
 #include "llvm/ADT/STLExtras.h"
 #include <optional>
@@ -1948,6 +1949,10 @@ ExprResult Sema::BuildCaptureInit(const Capture &Cap,
 
 ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body) {
   LambdaScopeInfo LSI = *cast<LambdaScopeInfo>(FunctionScopes.back());
+
+  if (LSI.CallOperator->hasAttr<SYCLKernelEntryPointAttr>())
+    SYCL().CheckSYCLEntryPointFunctionDecl(LSI.CallOperator);
+
   ActOnFinishFunctionBody(LSI.CallOperator, Body);
 
   return BuildLambdaExpr(StartLoc, Body->getEndLoc(), &LSI);
diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index d4fddeb01d0fc..ce53990fdcb18 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -10,7 +10,9 @@
 
 #include "clang/Sema/SemaSYCL.h"
 #include "clang/AST/Mangle.h"
+#include "clang/AST/SYCLKernelInfo.h"
 #include "clang/AST/TypeOrdering.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Sema/Attr.h"
 #include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/Sema.h"
@@ -206,3 +208,157 @@ void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (SemaRef.Context)
                  SYCLKernelEntryPointAttr(SemaRef.Context, AL, TSI));
 }
+
+// Given a potentially qualified type, SourceLocationForUserDeclaredType()
+// returns the source location of the canonical declaration of the unqualified
+// desugared user declared type, if any. For non-user declared types, an
+// invalid source location is returned. The intended usage of this function
+// is to identify an appropriate source location, if any, for a
+// "entity declared here" diagnostic note.
+static SourceLocation SourceLocationForUserDeclaredType(QualType QT) {
+  SourceLocation Loc;
+  const Type *T = QT->getUnqualifiedDesugaredType();
+  if (const TagType *TT = dyn_cast<TagType>(T))
+    Loc = TT->getDecl()->getLocation();
+  else if (const ObjCInterfaceType *ObjCIT = dyn_cast<ObjCInterfaceType>(T))
+    Loc = ObjCIT->getDecl()->getLocation();
+  return Loc;
+}
+
+static bool CheckSYCLKernelName(Sema &S, SourceLocation Loc,
+                                QualType KernelName) {
+  assert(!KernelName->isDependentType());
+
+  if (!KernelName->isStructureOrClassType()) {
+    // SYCL 2020 section 5.2, "Naming of kernels", only requires that the
+    // kernel name be a C++ typename. However, the definition of "kernel name"
+    // in the glossary states that a kernel name is a class type. Neither
+    // section explicitly states whether the kernel name type can be
+    // cv-qualified. For now, kernel name types are required to be class types
+    // and that they may be cv-qualified. The following issue requests
+    // clarification from the SYCL WG.
+    //   https://github.com/KhronosGroup/SYCL-Docs/issues/568
+    S.Diag(Loc, diag::warn_sycl_kernel_name_not_a_class_type) << KernelName;
+    SourceLocation DeclTypeLoc = SourceLocationForUserDeclaredType(KernelName);
+    if (DeclTypeLoc.isValid())
+      S.Diag(DeclTypeLoc, diag::note_entity_declared_at) << KernelName;
+    return true;
+  }
+
+  return false;
+}
+
+void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) {
+  // Ensure that all attributes present on the declaration are consistent
+  // and warn about any redundant ones.
+  SYCLKernelEntryPointAttr *SKEPAttr = nullptr;
+  for (auto *SAI : FD->specific_attrs<SYCLKernelEntryPointAttr>()) {
+    if (!SKEPAttr) {
+      SKEPAttr = SAI;
+      continue;
+    }
+    if (!getASTContext().hasSameType(SAI->getKernelName(),
+                                     SKEPAttr->getKernelName())) {
+      Diag(SAI->getLocation(), diag::err_sycl_entry_point_invalid_redeclaration)
+          << SAI->getKernelName() << SKEPAttr->getKernelName();
+      Diag(SKEPAttr->getLocation(), diag::note_previous_attribute);
+      SAI->setInvalidAttr();
+    } else {
+      Diag(SAI->getLocation(),
+           diag::warn_sycl_entry_point_redundant_declaration);
+      Diag(SKEPAttr->getLocation(), diag::note_previous_attribute);
+    }
+  }
+  assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute");
+
+  // Ensure the kernel name type is valid.
+  if (!SKEPAttr->getKernelName()->isDependentType() &&
+      CheckSYCLKernelName(SemaRef, SKEPAttr->getLocation(),
+                          SKEPAttr->getKernelName()))
+    SKEPAttr->setInvalidAttr();
+
+  // Ensure that an attribute present on the previous declaration
+  // matches the one on this declaration.
+  FunctionDecl *PrevFD = FD->getPreviousDecl();
+  if (PrevFD && !PrevFD->isInvalidDecl()) {
+    const auto *PrevSKEPAttr = PrevFD->getAttr<SYCLKernelEntryPointAttr>();
+    if (PrevSKEPAttr && !PrevSKEPAttr->isInvalidAttr()) {
+      if (!getASTContext().hasSameType(SKEPAttr->getKernelName(),
+                                       PrevSKEPAttr->getKernelName())) {
+        Diag(SKEPAttr->getLocation(),
+             diag::err_sycl_entry_point_invalid_redeclaration)
+            << SKEPAttr->getKernelName() << PrevSKEPAttr->getKernelName();
+        Diag(PrevSKEPAttr->getLocation(), diag::note_previous_decl) << PrevFD;
+        SKEPAttr->setInvalidAttr();
+      }
+    }
+  }
+
+  if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
+    if (!MD->isStatic()) {
+      Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+          << /*non-static member function*/ 0;
+      SKEPAttr->setInvalidAttr();
+    }
+  }
+
+  if (FD->isVariadic()) {
+    Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+        << /*variadic function*/ 1;
+    SKEPAttr->setInvalidAttr();
+  }
+
+  if (FD->isDefaulted()) {
+    Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+        << /*defaulted function*/ 3;
+    SKEPAttr->setInvalidAttr();
+  } else if (FD->isDeleted()) {
+    Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+        << /*deleted function*/ 2;
+    SKEPAttr->setInvalidAttr();
+  }
+
+  if (FD->isConsteval()) {
+    Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+        << /*consteval function*/ 5;
+    SKEPAttr->setInvalidAttr();
+  } else if (FD->isConstexpr()) {
+    Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+        << /*constexpr function*/ 4;
+    SKEPAttr->setInvalidAttr();
+  }
+
+  if (FD->isNoReturn()) {
+    Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
+        << /*function declared with the 'noreturn' attribute*/ 6;
+    SKEPAttr->setInvalidAttr();
+  }
+
+  if (FD->getReturnType()->isUndeducedType()) {
+    Diag(SKEPAttr->getLocation(),
+         diag::err_sycl_entry_point_deduced_return_type);
+    SKEPAttr->setInvalidAttr();
+  } else if (!FD->getReturnType()->isDependentType() &&
+             !FD->getReturnType()->isVoidType()) {
+    Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_return_type);
+    SKEPAttr->setInvalidAttr();
+  }
+
+  if (!FD->isInvalidDecl() && !FD->isTemplated() &&
+      !SKEPAttr->isInvalidAttr()) {
+    const SYCLKernelInfo *SKI =
+        getASTContext().findSYCLKernelInfo(SKEPAttr->getKernelName());
+    if (SKI) {
+      if (!declaresSameEntity(FD, SKI->getKernelEntryPointDecl())) {
+        // FIXME: This diagnostic should include the origin of the kernel
+        // FIXME: names; not just the locations of the conflicting declarations.
+        Diag(FD->getLocation(), diag::err_sycl_kernel_name_conflict);
+        Diag(SKI->getKernelEntryPointDecl()->getLocation(),
+             diag::note_previous_declaration);
+        SKEPAttr->setInvalidAttr();
+      }
+    } else {
+      getASTContext().registerSYCLEntryPointFunction(FD);
+    }
+  }
+}
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 8c60e85c93d70..dee5169ae5723 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1134,8 +1134,19 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
   // the presence of a sycl_kernel_entry_point attribute, register it so that
   // associated metadata is recreated.
   if (FD->hasAttr<SYCLKernelEntryPointAttr>()) {
+    auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>();
     ASTContext &C = Reader.getContext();
-    C.registerSYCLEntryPointFunction(FD);
+    const SYCLKernelInfo *SKI = C.findSYCLKernelInfo(SKEPAttr->getKernelName());
+    if (SKI) {
+      if (!declaresSameEntity(FD, SKI->getKernelEntryPointDecl())) {
+        Reader.Diag(FD->getLocation(), diag::err_sycl_kernel_name_conflict);
+        Reader.Diag(SKI->getKernelEntryPointDecl()->getLocation(),
+                    diag::note_previous_declaration);
+        SKEPAttr->setInvalidAttr();
+      }
+    } else {
+      C.registerSYCLEntryPointFunction(FD);
+    }
   }
 }
 
diff --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
index c351f3b7d03ea..0189cf0402d3a 100644
--- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
+++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
@@ -107,14 +107,29 @@ void skep5<KN<5,2>>(long) {
 // CHECK:      | |-TemplateArgument type 'long'
 // CHECK:      | `-SYCLKernelEntryPointAttr {{.*}} KN<5, 2>
 
+// FIXME: C++23 [temp.expl.spec]p12 states:
+// FIXME:   ... Similarly, attributes appearing in the declaration of a template
+// FIXME:   have no effect on an explicit specialization of that template.
+// FIXME: Clang currently instantiates a function template specialization from
+// FIXME: the function template declaration and links it as a previous
+// FIXME: declaration of an explicit specialization. The instantiated
+// FIXME: declaration includes attributes instantiated from the function
+// FIXME: template declaration. When the instantiated declaration and the
+// FIXME: explicit specialization both specify a sycl_kernel_entry_point
+// FIXME: attribute with different kernel name types, a spurious diagnostic
+// FIXME: is issued. The following test case is incorrectly diagnosed as
+// FIXME: having conflicting kernel name types (KN<5,3> vs the incorrectly
+// FIXME: inherited KN<5,-1>).
+#if 0
 template<>
 [[clang::sycl_kernel_entry_point(KN<5,3>)]]
 void skep5<KN<5,-1>>(long long) {
 }
-// CHECK:      |-FunctionDecl {{.*}} prev {{.*}} skep5 'void (long long)' explicit_specialization
-// CHECK-NEXT: | |-TemplateArgument type 'KN<5, -1>'
-// CHECK:      | |-TemplateArgument type 'long long'
-// CHECK:      | `-SYCLKernelEntryPointAttr {{.*}} KN<5, 3>
+// FIXME-CHECK:      |-FunctionDecl {{.*}} prev {{.*}} skep5 'void (long long)' explicit_specialization
+// FIXME-CHECK-NEXT: | |-TemplateArgument type 'KN<5, -1>'
+// FIXME-CHECK:      | |-TemplateArgument type 'long long'
+// FIXME-CHECK:      | `-SYCLKernelEntryPointAttr {{.*}} KN<5, 3>
+#endif
 
 template void skep5<KN<5,4>>(int);
 // Checks are located with the primary template declaration above.
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
new file mode 100644
index 0000000000000..5b3cf9853173d
--- /dev/null
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
@@ -0,0 +1,352 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++23 -fsyntax-only -fsycl-is-device -verify %s
+
+// These tests validate appertainment for the sycl_kernel_entry_point attribute.
+
+#if __cplusplus >= 202002L
+// Mock coroutine support.
+namespace std {
+
+template<typename Promise = void>
+struct coroutine_handle {
+  template<typename T>
+  coroutine_handle(const coroutine_handle<T>&);
+  static coroutine_handle from_address(void *addr);
+};
+
+template<typename R, typename... Args>
+struct coroutine_traits {
+  struct suspend_never {
+    bool await_ready() const noexcept;
+    void await_suspend(std::coroutine_handle<>) const noexcept;
+    void await_resume() const noexcept;
+  };
+  struct promise_type {
+    void get_return_object() noexcept;
+    suspend_never initial_suspend() const noexcept;
+    suspend_never final_suspend() const noexcept;
+    void return_void() noexcept;
+    void unhandled_exception() noexcept;
+  };
+};
+
+}
+#endif
+
+// A unique kernel name type is required for each declared kernel entry point.
+template<int, int = 0> struct KN;
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Valid declarations.
+////////////////////////////////////////////////////////////////////////////////
+
+// Function declaration with GNU attribute spelling
+__attribute__((sycl_kernel_entry_point(KN<1>)))
+void ok1();
+
+// Function declaration with Clang attribute spelling.
+[[clang::sycl_kernel_entry_point(KN<2>)]]
+void ok2();
+
+// Function definition.
+[[clang::sycl_kernel_entry_point(KN<3>)]]
+void ok3() {}
+
+// Function template definition.
+template<typename KNT, typename T>
+[[clang::sycl_kernel_entry_point(KNT)]]
+void ok4(T) {}
+
+// Function template explicit specialization.
+template<>
+[[clang::sycl_kernel_entry_point(KN<4,1>)]]
+void ok4<KN<4,1>>(int) {}
+
+// Function template explicit instantiation.
+template void ok4<KN<4,2>, long>(long);
+
+namespace NS {
+// Function declaration at namespace scope.
+[[clang::sycl_kernel_entry_point(KN<5>)]]
+void ok5();
+}
+
+struct S6 {
+  // Static member function declaration.
+  [[clang::sycl_kernel_entry_point(KN<6>)]]
+  static void ok6();
+};
+
+// Dependent hidden friend definition.
+template<typename KNT>
+struct S7 {
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  friend void ok7(S7) {}
+};
+void test_ok7() {
+  ok7(S7<KN<7>>{});
+}
+
+// Non-dependent hidden friend definition.
+struct S8Base {};
+template<typename>
+struct S8 : S8Base {
+  [[clang::sycl_kernel_entry_point(KN<8>)]]
+  friend void ok8(const S8Base&) {}
+};
+void test_ok8() {
+  ok8(S8<int>{});
+}
+
+// The sycl_kernel_entry_point attribute must match across declarations and
+// cannot be added for the first time after a definition.
+[[clang::sycl_kernel_entry_point(KN<9>)]]
+void ok9();
+[[clang::sycl_kernel_entry_point(KN<9>)]]
+void ok9();
+[[clang::sycl_kernel_entry_point(KN<10>)]]
+void ok10();
+void ok10() {}
+void ok11();
+[[clang::sycl_kernel_entry_point(KN<11>)]]
+void ok11() {}
+
+using VOID = void;
+[[clang::sycl_kernel_entry_point(KN<12>)]]
+VOID ok12();
+[[clang::sycl_kernel_entry_point(KN<13>)]]
+const void ok13();
+
+#if __cplusplus >= 202302L
+auto ok14 = [] [[clang::sycl_kernel_entry_point(KN<14>)]] static -> void {};
+#endif
+
+template<typename KNT, typename T>
+struct S15 {
+  // Don't diagnose a dependent return type as a non-void type.
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  static T ok15();
+};
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Invalid declarations.
+////////////////////////////////////////////////////////////////////////////////
+
+// The sycl_kernel_entry_point attribute cannot appertain to main() because
+// main() has a non-void return type. However, if the requirement for a void
+// return type were to be relaxed or if an allowance was made for main() to
+// return void (as gcc allows in some modes and as has been proposed to WG21
+// on occassion), main() still can't function as a SYCL kernel entry point,
+// so this test ensures such attempted uses of the attribute are rejected.
+struct Smain;
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions with a 'void' return type}}
+[[clang::sycl_kernel_entry_point(Smain)]]
+int main();
+
+template<int> struct BADKN;
+
+struct B1 {
+  // Non-static data member declaration.
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+  [[clang::sycl_kernel_entry_point(BADKN<1>)]]
+  int bad1;
+};
+
+struct B2 {
+  // Static data member declaration.
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+  [[clang::sycl_kernel_entry_point(BADKN<2>)]]
+  static int bad2;
+};
+
+struct B3 {
+  // Non-static member function declaration.
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+  [[clang::sycl_kernel_entry_point(BADKN<3>)]]
+  void bad3();
+};
+
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+namespace bad4 [[clang::sycl_kernel_entry_point(BADKN<4>)]] {}
+
+#if __cplusplus >= 202002L
+// expected-error@+2 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+template<typename>
+concept bad5 [[clang::sycl_kernel_entry_point(BADKN<5>)]] = true;
+#endif
+
+// Type alias declarations.
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+typedef void bad6 [[clang::sycl_kernel_entry_point(BADKN<6>)]] ();
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+using bad7 [[clang::sycl_kernel_entry_point(BADKN<7>)]] = void();
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+using bad8 [[clang::sycl_kernel_entry_point(BADKN<8>)]] = int;
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to types}}
+using bad9 = int [[clang::sycl_kernel_entry_point(BADKN<9>)]];
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to types}}
+using bad10 = int() [[clang::sycl_kernel_entry_point(BADKN<10>)]];
+
+// Variable declaration.
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+[[clang::sycl_kernel_entry_point(BADKN<11>)]]
+int bad11;
+
+// Class declaration.
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+struct [[clang::sycl_kernel_entry_point(BADKN<12>)]] bad12;
+
+// Enumeration declaration.
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+enum [[clang::sycl_kernel_entry_point(BADKN<13>)]] bad13 {};
+
+// Enumerator.
+// expected-error@+2 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+enum {
+  bad14 [[clang::sycl_kernel_entry_point(BADKN<14>)]]
+};
+
+// Attribute added after the definition.
+// expected-error@+3 {{'sycl_kernel_entry_point' attribute cannot be added to a function after the function is defined}}
+// expected-note@+1 {{previous definition is here}}
+void bad15() {}
+[[clang::sycl_kernel_entry_point(BADKN<15>)]]
+void bad15();
+
+// The function must return void.
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions with a 'void' return type}}
+[[clang::sycl_kernel_entry_point(BADKN<16>)]]
+int bad16();
+
+// Function parameters.
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+void bad17(void (fp [[clang::sycl_kernel_entry_point(BADKN<17>)]])());
+
+// Function template parameters.
+// FIXME: Clang currently ignores attributes that appear in template parameters
+// FIXME: and the C++ standard is unclear regarding whether such attributes are
+// FIXME: permitted. P3324 (Attributes for namespace aliases, template
+// FIXME: parameters, and lambda captures) seeks to clarify the situation.
+// FIXME-expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+template<void (fp [[clang::sycl_kernel_entry_point(BADKN<18>)]])()>
+void bad18();
+
+#if __cplusplus >= 202002L
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a coroutine}}
+[[clang::sycl_kernel_entry_point(BADKN<19>)]]
+void bad19() {
+  co_return;
+}
+#endif
+
+struct B20 {
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+  [[clang::sycl_kernel_entry_point(BADKN<20>)]]
+  B20();
+};
+
+struct B21 {
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+  [[clang::sycl_kernel_entry_point(BADKN<21>)]]
+  ~B21();
+};
+
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a variadic function}}
+[[clang::sycl_kernel_entry_point(BADKN<22>)]]
+void bad22(...);
+
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a deleted function}}
+[[clang::sycl_kernel_entry_point(BADKN<23>)]]
+void bad23() = delete;
+
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a constexpr function}}
+[[clang::sycl_kernel_entry_point(BADKN<24>)]]
+constexpr void bad24() {}
+
+#if __cplusplus >= 202002L
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a consteval function}}
+[[clang::sycl_kernel_entry_point(BADKN<25>)]]
+consteval void bad25() {}
+#endif
+
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a function declared with the 'noreturn' attribute}}
+[[clang::sycl_kernel_entry_point(BADKN<26>)]]
+[[noreturn]] void bad26();
+
+// expected-error@+3 {{attribute 'target' multiversioning cannot be combined with attribute 'sycl_kernel_entry_point'}}
+__attribute__((target("avx"))) void bad27();
+[[clang::sycl_kernel_entry_point(BADKN<27>)]]
+__attribute__((target("sse4.2"))) void bad27();
+
+template<typename KNT>
+struct B28 {
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a deleted function}}
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  friend void bad28() = delete;
+};
+
+#if __cplusplus >= 202002L
+template<typename KNT, typename T>
+struct B29 {
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a defaulted function}}
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  friend T operator==(B29, B29) = default;
+};
+#endif
+
+#if __cplusplus >= 202002L
+template<typename KNT>
+struct B30 {
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a coroutine}}
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  friend void bad30() { co_return; }
+};
+#endif
+
+template<typename KNT>
+struct B31 {
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a variadic function}}
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  friend void bad31(...) {}
+};
+
+template<typename KNT>
+struct B32 {
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a constexpr function}}
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  friend constexpr void bad32() {}
+};
+
+#if __cplusplus >= 202002L
+template<typename KNT>
+struct B33 {
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a consteval function}}
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  friend consteval void bad33() {}
+};
+#endif
+
+template<typename KNT>
+struct B34 {
+  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a function declared with the 'noreturn' attribute}}
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  [[noreturn]] friend void bad34() {}
+};
+
+#if __cplusplus >= 202302L
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+auto bad35 = [] [[clang::sycl_kernel_entry_point(BADKN<35>)]] -> void {};
+#endif
+
+#if __cplusplus >= 202302L
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions with a non-deduced 'void' return type}}
+auto bad36 = [] [[clang::sycl_kernel_entry_point(BADKN<36>)]] static {};
+#endif
+
+#if __cplusplus >= 202302L
+// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a coroutine}}
+auto bad37 = [] [[clang::sycl_kernel_entry_point(BADKN<37>)]] static -> void { co_return; };
+#endif
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp
index c63d241163e61..14b5d2746631d 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp
@@ -120,18 +120,3 @@ template<typename> concept C = true;
 // expected-error@+1 {{expected a type}}
 [[clang::sycl_kernel_entry_point(C<int>)]] void bad11();
 #endif
-
-struct B12; // #B12-decl
-// FIXME: C++23 [temp.expl.spec]p12 states:
-// FIXME:   ... Similarly, attributes appearing in the declaration of a template
-// FIXME:   have no effect on an explicit specialization of that template.
-// FIXME: Clang currently instantiates and propagates attributes from a function
-// FIXME: template to its explicit specializations resulting in the following
-// FIXME: spurious error.
-// expected-error@+4 {{incomplete type 'B12' named in nested name specifier}}
-// expected-note@+5 {{in instantiation of function template specialization 'bad12<B12>' requested here}}
-// expected-note@#B12-decl {{forward declaration of 'B12'}}
-template<typename T>
-[[clang::sycl_kernel_entry_point(typename T::not_found)]] void bad12() {}
-template<>
-void bad12<B12>() {}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp
new file mode 100644
index 0000000000000..83c3e5ca267ab
--- /dev/null
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp
@@ -0,0 +1,104 @@
+// Test that SYCL kernel name conflicts that occur across module boundaries are
+// properly diagnosed and that declarations are properly merged so that spurious
+// conflicts are not reported.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \
+// RUN:   -std=c++17 -fsycl-is-host %t/test.cpp -verify
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \
+// RUN:   -std=c++17 -fsycl-is-device %t/test.cpp -verify
+
+#--- module.modulemap
+module M1 { header "m1.h" }
+module M2 { header "m2.h" }
+
+
+#--- common.h
+template<int> struct KN;
+
+[[clang::sycl_kernel_entry_point(KN<1>)]]
+void common_test1() {}
+
+template<typename T>
+[[clang::sycl_kernel_entry_point(T)]]
+void common_test2() {}
+template void common_test2<KN<2>>();
+
+
+#--- m1.h
+#include "common.h"
+
+[[clang::sycl_kernel_entry_point(KN<3>)]]
+void m1_test3() {} // << expected previous declaration note here.
+
+template<typename T>
+[[clang::sycl_kernel_entry_point(T)]]
+void m1_test4() {} // << expected previous declaration note here.
+template void m1_test4<KN<4>>();
+
+[[clang::sycl_kernel_entry_point(KN<5>)]]
+void m1_test5() {} // << expected previous declaration note here.
+
+template<typename T>
+[[clang::sycl_kernel_entry_point(T)]]
+void m1_test6() {} // << expected previous declaration note here.
+template void m1_test6<KN<6>>();
+
+
+#--- m2.h
+#include "common.h"
+
+[[clang::sycl_kernel_entry_point(KN<3>)]]
+void m2_test3() {} // << expected kernel name conflict here.
+
+template<typename T>
+[[clang::sycl_kernel_entry_point(T)]]
+void m2_test4() {} // << expected kernel name conflict here.
+template void m2_test4<KN<4>>();
+
+[[clang::sycl_kernel_entry_point(KN<7>)]]
+void m2_test7() {} // << expected previous declaration note here.
+
+template<typename T>
+[[clang::sycl_kernel_entry_point(T)]]
+void m2_test8() {} // << expected previous declaration note here.
+template void m2_test8<KN<8>>();
+
+
+#--- test.cpp
+#include "m1.h"
+#include "m2.h"
+
+// Expected diagnostics for m1_test3() and m2_test3():
+// expected-error@m2.h:4 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-note@m1.h:12 {{previous declaration is here}}
+
+// Expected diagnostics for m1_test4<KN<4>>() and m2_test4<KN<4>>():
+// expected-error@m2.h:8 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-note@m1.h:16 {{previous declaration is here}}
+
+// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-note@m1.h:4 {{previous declaration is here}}
+[[clang::sycl_kernel_entry_point(KN<5>)]]
+void test5() {}
+
+// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-note@m1.h:8 {{previous declaration is here}}
+[[clang::sycl_kernel_entry_point(KN<6>)]]
+void test6() {}
+
+// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-note@m2.h:12 {{previous declaration is here}}
+[[clang::sycl_kernel_entry_point(KN<7>)]]
+void test7() {}
+
+// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-note@m2.h:16 {{previous declaration is here}}
+[[clang::sycl_kernel_entry_point(KN<8>)]]
+void test8() {}
+
+void f() {
+  common_test1();
+  common_test2<KN<2>>();
+}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp
new file mode 100644
index 0000000000000..0814d898d1c0e
--- /dev/null
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp
@@ -0,0 +1,36 @@
+// Test that SYCL kernel name conflicts that occur across PCH boundaries are
+// properly diagnosed.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -std=c++17 -fsycl-is-host -emit-pch -x c++-header \
+// RUN:   %t/pch.h -o %t/pch.h.host.pch
+// RUN: %clang_cc1 -std=c++17 -fsycl-is-host -verify \
+// RUN:   -include-pch %t/pch.h.host.pch %t/test.cpp
+// RUN: %clang_cc1 -std=c++17 -fsycl-is-device -emit-pch -x c++-header \
+// RUN:   %t/pch.h -o %t/pch.h.device.pch
+// RUN: %clang_cc1 -std=c++17 -fsycl-is-device -verify \
+// RUN:   -include-pch %t/pch.h.device.pch %t/test.cpp
+
+#--- pch.h
+template<int> struct KN;
+
+[[clang::sycl_kernel_entry_point(KN<1>)]]
+void pch_test1() {} // << expected previous declaration note here.
+
+template<typename T>
+[[clang::sycl_kernel_entry_point(T)]]
+void pch_test2() {} // << expected previous declaration note here.
+template void pch_test2<KN<2>>();
+
+
+#--- test.cpp
+// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-note@pch.h:4 {{previous declaration is here}}
+[[clang::sycl_kernel_entry_point(KN<1>)]]
+void test1() {}
+
+// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-note@pch.h:8 {{previous declaration is here}}
+[[clang::sycl_kernel_entry_point(KN<2>)]]
+void test2() {}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp
new file mode 100644
index 0000000000000..78dd89696c02d
--- /dev/null
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp
@@ -0,0 +1,118 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s
+
+// These tests validate that the kernel name type argument provided to the
+// sycl_kernel_entry_point attribute meets the requirements of a SYCL kernel
+// name as described in section 5.2, "Naming of kernels", of the SYCL 2020
+// specification.
+
+struct S1;
+// expected-warning@+3 {{redundant 'sycl_kernel_entry_point' attribute}}
+// expected-note@+1  {{previous attribute is here}}
+[[clang::sycl_kernel_entry_point(S1),
+  clang::sycl_kernel_entry_point(S1)]]
+void ok1();
+
+// expected-error@+1 {{'int' is not a valid SYCL kernel name type; a non-union class type is required}}
+[[clang::sycl_kernel_entry_point(int)]] void bad2();
+
+// expected-error@+1 {{'int ()' is not a valid SYCL kernel name type; a non-union class type is required}}
+[[clang::sycl_kernel_entry_point(int())]] void bad3();
+
+// expected-error@+1 {{'int (*)()' is not a valid SYCL kernel name type; a non-union class type is required}}
+[[clang::sycl_kernel_entry_point(int(*)())]] void bad4();
+
+// expected-error@+1 {{'int (&)()' is not a valid SYCL kernel name type; a non-union class type is required}}
+[[clang::sycl_kernel_entry_point(int(&)())]] void bad5();
+
+// expected-error@+1 {{'decltype(nullptr)' (aka 'std::nullptr_t') is not a valid SYCL kernel name type; a non-union class type is required}}
+[[clang::sycl_kernel_entry_point(decltype(nullptr))]] void bad6();
+
+union U7; // #U7-decl
+// expected-error@+2 {{'U7' is not a valid SYCL kernel name type; a non-union class type is required}}
+// expected-note@#U7-decl {{'U7' declared here}}
+[[clang::sycl_kernel_entry_point(U7)]] void bad7();
+
+enum E8 {}; // #E8-decl
+// expected-error@+2 {{'E8' is not a valid SYCL kernel name type; a non-union class type is required}}
+// expected-note@#E8-decl {{'E8' declared here}}
+[[clang::sycl_kernel_entry_point(E8)]] void bad8();
+
+enum E9 : int; // #E9-decl
+// expected-error@+2 {{'E9' is not a valid SYCL kernel name type; a non-union class type is required}}
+// expected-note@#E9-decl {{'E9' declared here}}
+[[clang::sycl_kernel_entry_point(E9)]] void bad9();
+
+struct B10 {
+  struct MS;
+};
+// FIXME-expected-error@+1 {{'sycl_kernel_entry_point' attribute argument must be a forward declarable class type}}
+[[clang::sycl_kernel_entry_point(B10::MS)]] void bad10();
+
+struct B11 {
+  struct MS;
+};
+// FIXME-expected-error@+3 {{'sycl_kernel_entry_point' attribute argument must be a forward declarable class type}}
+template<typename T>
+[[clang::sycl_kernel_entry_point(typename T::MS)]] void bad11() {}
+template void bad11<B11>();
+
+template<typename T>
+[[clang::sycl_kernel_entry_point(T)]] void bad12();
+void f12() {
+  // FIXME-expected-error@+2 {{'sycl_kernel_entry_point' attribute argument must be a forward declarable class type}}
+  struct LS;
+  bad12<LS>();
+}
+
+struct B13_1;
+struct B13_2;
+// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B13_2' vs 'B13_1'}}
+// expected-note@+1  {{'bad13' declared here}}
+[[clang::sycl_kernel_entry_point(B13_1)]] void bad13();
+[[clang::sycl_kernel_entry_point(B13_2)]] void bad13() {}
+
+struct B14_1;
+struct B14_2;
+// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B14_2' vs 'B14_1'}}
+// expected-note@+1  {{previous attribute is here}}
+[[clang::sycl_kernel_entry_point(B14_1),
+  clang::sycl_kernel_entry_point(B14_2)]]
+void bad14();
+
+struct B15;
+// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-note@+1  {{previous declaration is here}}
+[[clang::sycl_kernel_entry_point(B15)]] void bad15_1();
+[[clang::sycl_kernel_entry_point(B15)]] void bad15_2();
+
+struct B16_1;
+struct B16_2;
+// expected-error@+4 {{'sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B16_2' vs 'B16_1'}}
+// expected-note@+1  {{'bad16' declared here}}
+[[clang::sycl_kernel_entry_point(B16_1)]] void bad16();
+void bad16(); // The attribute from the previous declaration is inherited.
+[[clang::sycl_kernel_entry_point(B16_2)]] void bad16();
+
+template<int>
+struct B17 {
+  // expected-error@+1 {{'int' is not a valid SYCL kernel name type; a non-union class type is required}}
+  [[clang::sycl_kernel_entry_point(int)]]
+  static void bad17();
+};
+
+template<int>
+struct B18 {
+  // expected-error@+1 {{'int' is not a valid SYCL kernel name type; a non-union class type is required}}
+  [[clang::sycl_kernel_entry_point(int)]]
+  friend void bad18() {}
+};
+
+template<typename KNT>
+struct B19 {
+  // expected-error@+1 {{'int' is not a valid SYCL kernel name type; a non-union class type is required}}
+  [[clang::sycl_kernel_entry_point(KNT)]]
+  friend void bad19() {}
+};
+// expected-note@+1 {{in instantiation of template class 'B19<int>' requested here}}
+B19<int> b19;
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp
new file mode 100644
index 0000000000000..4c61570419629
--- /dev/null
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-sfinae.cpp
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s
+
+// These tests are intended to validate that a sycl_kernel_entry_point attribute
+// appearing in the declaration of a function template does not affect overload
+// resolution or cause spurious errors during overload resolution due to either
+// a substitution failure in the attribute argument or a semantic check of the
+// attribute during instantiation of a specialization unless that specialization
+// is selected by overload resolution.
+
+// FIXME: C++23 [temp.expl.spec]p12 states:
+// FIXME:   ... Similarly, attributes appearing in the declaration of a template
+// FIXME:   have no effect on an explicit specialization of that template.
+// FIXME: Clang currently instantiates and propagates attributes from a function
+// FIXME: template to its explicit specializations resulting in the following
+// FIXME: spurious error.
+struct S1; // #S1-decl
+// expected-error@+4 {{incomplete type 'S1' named in nested name specifier}}
+// expected-note@+5 {{in instantiation of function template specialization 'ok1<S1>' requested here}}
+// expected-note@#S1-decl {{forward declaration of 'S1'}}
+template<typename T>
+[[clang::sycl_kernel_entry_point(typename T::invalid)]] void ok1() {}
+template<>
+void ok1<S1>() {}
+void test_ok1() {
+  // ok1<S1>() is not a call to a SYCL kernel entry point function.
+  ok1<S1>();
+}
+
+// FIXME: The sycl_kernel_entry_point attribute should not be instantiated
+// FIXME: until after overload resolution has completed.
+struct S2; // #S2-decl
+// expected-error@+6 {{incomplete type 'S2' named in nested name specifier}}
+// expected-note@+10 {{in instantiation of function template specialization 'ok2<S2>' requested here}}
+// expected-note@#S2-decl {{forward declaration of 'S2'}}
+template<typename T>
+[[clang::sycl_kernel_entry_point(T)]] void ok2(int) {}
+template<typename T>
+[[clang::sycl_kernel_entry_point(typename T::invalid)]] void ok2(long) {}
+void test_ok2() {
+  // ok2(int) is a better match and is therefore selected by overload
+  // resolution; the attempted instantiation of ok2(long) should not produce
+  // an error for the substitution failure into the attribute argument.
+  ok2<S2>(2);
+}
+
+// FIXME: The sycl_kernel_entry_point attribute should not be instantiated
+// FIXME: until after overload resolution has completed.
+struct S3;
+struct Select3 {
+  using bad_type = int;
+  using good_type = S3;
+};
+// expected-error@+5 {{'typename Select3::bad_type' (aka 'int') is not a valid SYCL kernel name type; a non-union class type is required}}
+// expected-note@+9 {{in instantiation of function template specialization 'ok3<Select3>' requested here}}
+template<typename T>
+[[clang::sycl_kernel_entry_point(typename T::good_type)]] void ok3(int) {}
+template<typename T>
+[[clang::sycl_kernel_entry_point(typename T::bad_type)]] void ok3(long) {}
+void test_ok3() {
+  // ok3(int) is a better match and is therefore selected by overload
+  // resolution; the attempted instantiation of ok3(long) should not produce
+  // an error for the invalid kernel name provided as the attribute argument.
+  ok3<Select3>(2);
+}

From 0acdba83ae5098c5c8f09f53aba4df37d97f3cf0 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Thu, 9 Jan 2025 14:46:34 -0600
Subject: [PATCH 57/79] [libc] Remove leftover 'gpu/' source directory
 (#122368)

Summary:
This isn't used anymore, I moved the GPU extensions into `offload/`.
---
 libc/src/CMakeLists.txt        |  4 ----
 libc/src/gpu/CMakeLists.txt    | 10 ----------
 libc/src/gpu/rpc_host_call.cpp | 35 ----------------------------------
 libc/src/gpu/rpc_host_call.h   | 21 --------------------
 4 files changed, 70 deletions(-)
 delete mode 100644 libc/src/gpu/CMakeLists.txt
 delete mode 100644 libc/src/gpu/rpc_host_call.cpp
 delete mode 100644 libc/src/gpu/rpc_host_call.h

diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index 9fc331ad18a39..32308ba147940 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -26,10 +26,6 @@ if(${LIBC_TARGET_OS} STREQUAL "linux")
   add_subdirectory(unistd)
 endif()
 
-if(${LIBC_TARGET_OS} STREQUAL "gpu")
-  add_subdirectory(gpu)
-endif()
-
 if(NOT LLVM_LIBC_FULL_BUILD)
   return()
 endif()
diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt
deleted file mode 100644
index e20228516b511..0000000000000
--- a/libc/src/gpu/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_entrypoint_object(
-  rpc_host_call
-  SRCS
-    rpc_host_call.cpp
-  HDRS
-    rpc_host_call.h
-  DEPENDS
-    libc.src.__support.RPC.rpc_client
-    libc.src.__support.GPU.utils
-)
diff --git a/libc/src/gpu/rpc_host_call.cpp b/libc/src/gpu/rpc_host_call.cpp
deleted file mode 100644
index 676031d16e154..0000000000000
--- a/libc/src/gpu/rpc_host_call.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===---------- GPU implementation of the external RPC call function ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/gpu/rpc_host_call.h"
-
-#include "src/__support/GPU/utils.h"
-#include "src/__support/RPC/rpc_client.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-
-namespace LIBC_NAMESPACE_DECL {
-
-// This calls the associated function pointer on the RPC server with the given
-// arguments. We expect that the pointer here is a valid pointer on the server.
-LLVM_LIBC_FUNCTION(unsigned long long, rpc_host_call,
-                   (void *fn, void *data, size_t size)) {
-  rpc::Client::Port port = rpc::client.open<LIBC_HOST_CALL>();
-  port.send_n(data, size);
-  port.send([=](rpc::Buffer *buffer, uint32_t) {
-    buffer->data[0] = reinterpret_cast<uintptr_t>(fn);
-  });
-  unsigned long long ret;
-  port.recv([&](rpc::Buffer *buffer, uint32_t) {
-    ret = static_cast<unsigned long long>(buffer->data[0]);
-  });
-  port.close();
-  return ret;
-}
-
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/gpu/rpc_host_call.h b/libc/src/gpu/rpc_host_call.h
deleted file mode 100644
index 861149dead561..0000000000000
--- a/libc/src/gpu/rpc_host_call.h
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- Implementation header for RPC functions -----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
-#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
-
-#include "src/__support/macros/config.h"
-#include <stddef.h> // size_t
-
-namespace LIBC_NAMESPACE_DECL {
-
-unsigned long long rpc_host_call(void *fn, void *buffer, size_t size);
-
-} // namespace LIBC_NAMESPACE_DECL
-
-#endif // LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H

From 84087226fa38b212325c651f1bc3caa79491bc80 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek@codeweavers.com>
Date: Thu, 9 Jan 2025 21:48:16 +0100
Subject: [PATCH 58/79] [LLD][COFF] Emit base relocation for native CHPE
 metadata pointer on ARM64X (#121500)

---
 lld/COFF/Chunks.cpp               | 16 ++++++++++++++++
 lld/test/COFF/arm64x-loadconfig.s |  6 +++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index ec0fdf0b67b38..d87a4d8d1ae79 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -564,6 +564,22 @@ void SectionChunk::getBaserels(std::vector<Baserel> *res) {
       continue;
     res->emplace_back(rva + rel.VirtualAddress, ty);
   }
+
+  // Insert a 64-bit relocation for CHPEMetadataPointer in the native load
+  // config of a hybrid ARM64X image. Its value will be set in prepareLoadConfig
+  // to match the value in the EC load config, which is expected to be
+  // a relocatable pointer to the __chpe_metadata symbol.
+  COFFLinkerContext &ctx = file->symtab.ctx;
+  if (ctx.hybridSymtab && ctx.symtab.loadConfigSym &&
+      ctx.symtab.loadConfigSym->getChunk() == this &&
+      ctx.hybridSymtab->loadConfigSym &&
+      ctx.symtab.loadConfigSize >=
+          offsetof(coff_load_configuration64, CHPEMetadataPointer) +
+              sizeof(coff_load_configuration64::CHPEMetadataPointer))
+    res->emplace_back(
+        ctx.symtab.loadConfigSym->getRVA() +
+            offsetof(coff_load_configuration64, CHPEMetadataPointer),
+        IMAGE_REL_BASED_DIR64);
 }
 
 // MinGW specific.
diff --git a/lld/test/COFF/arm64x-loadconfig.s b/lld/test/COFF/arm64x-loadconfig.s
index d21f4bfe95b84..3b53d32a9b549 100644
--- a/lld/test/COFF/arm64x-loadconfig.s
+++ b/lld/test/COFF/arm64x-loadconfig.s
@@ -118,10 +118,14 @@
 // BASERELOC:      BaseReloc [
 // BASERELOC-NEXT:   Entry {
 // BASERELOC-NEXT:     Type: DIR64
+// BASERELOC-NEXT:     Address: 0x10C8
+// BASERELOC-NEXT:   }
+// BASERELOC-NEXT:   Entry {
+// BASERELOC-NEXT:     Type: DIR64
 // BASERELOC-NEXT:     Address: 0x1208
 // BASERELOC-NEXT:   }
 // BASERELOC-NEXT:   Entry {
-// BASERELOC:          Type: DIR64
+// BASERELOC-NEXT:     Type: DIR64
 // BASERELOC-NEXT:     Address: 0x2074
 // BASERELOC-NEXT:   }
 

From e8c8543a1c728278bf323d34e451bcf9042d9257 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 9 Jan 2025 20:55:36 +0000
Subject: [PATCH 59/79] [TySan] Intercept malloc_size on Apple platforms.
 (#122133)

After https://github.com/llvm/llvm-project/pull/120563 malloc_size also
needs intercepting on Apple platforms, otherwise all type-sanitized
binaries crash on startup with an objc error:
realized class 0x12345 has corrupt data pointer: malloc_size(0x567) = 0

PR: https://github.com/llvm/llvm-project/pull/122133
---
 .../sanitizer_common/sanitizer_allocator_dlsym.h   | 14 ++++++++------
 compiler-rt/lib/tysan/tysan_interceptors.cpp       |  8 ++++++++
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h
index b360478a058a5..6e6cdbd9eeaed 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h
@@ -36,21 +36,19 @@ struct DlSymAllocator {
   static void *Allocate(uptr size_in_bytes, uptr align = kWordSize) {
     void *ptr = InternalAlloc(size_in_bytes, nullptr, align);
     CHECK(internal_allocator()->FromPrimary(ptr));
-    Details::OnAllocate(ptr,
-                        internal_allocator()->GetActuallyAllocatedSize(ptr));
+    Details::OnAllocate(ptr, GetSize(ptr));
     return ptr;
   }
 
   static void *Callocate(usize nmemb, usize size) {
     void *ptr = InternalCalloc(nmemb, size);
     CHECK(internal_allocator()->FromPrimary(ptr));
-    Details::OnAllocate(ptr,
-                        internal_allocator()->GetActuallyAllocatedSize(ptr));
+    Details::OnAllocate(ptr, GetSize(ptr));
     return ptr;
   }
 
   static void Free(void *ptr) {
-    uptr size = internal_allocator()->GetActuallyAllocatedSize(ptr);
+    uptr size = GetSize(ptr);
     Details::OnFree(ptr, size);
     InternalFree(ptr);
   }
@@ -63,7 +61,7 @@ struct DlSymAllocator {
       Free(ptr);
       return nullptr;
     }
-    uptr size = internal_allocator()->GetActuallyAllocatedSize(ptr);
+    uptr size = GetSize(ptr);
     uptr memcpy_size = Min(new_size, size);
     void *new_ptr = Allocate(new_size);
     if (new_ptr)
@@ -77,6 +75,10 @@ struct DlSymAllocator {
     return Realloc(ptr, count * size);
   }
 
+  static uptr GetSize(void *ptr) {
+    return internal_allocator()->GetActuallyAllocatedSize(ptr);
+  }
+
   static void OnAllocate(const void *ptr, uptr size) {}
   static void OnFree(const void *ptr, uptr size) {}
 };
diff --git a/compiler-rt/lib/tysan/tysan_interceptors.cpp b/compiler-rt/lib/tysan/tysan_interceptors.cpp
index 08b1010a48ecf..a9c55a3ae0cf0 100644
--- a/compiler-rt/lib/tysan/tysan_interceptors.cpp
+++ b/compiler-rt/lib/tysan/tysan_interceptors.cpp
@@ -108,6 +108,14 @@ INTERCEPTOR(void *, malloc, uptr size) {
   return res;
 }
 
+#if SANITIZER_APPLE
+INTERCEPTOR(uptr, malloc_size, void *ptr) {
+  if (DlsymAlloc::PointerIsMine(ptr))
+    return DlsymAlloc::GetSize(ptr);
+  return REAL(malloc_size)(ptr);
+}
+#endif
+
 INTERCEPTOR(void *, realloc, void *ptr, uptr size) {
   if (DlsymAlloc::Use() || DlsymAlloc::PointerIsMine(ptr))
     return DlsymAlloc::Realloc(ptr, size);

From d01ae567742c4d83b67483e15eb74c0ecd2e8270 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu@sifive.com>
Date: Thu, 9 Jan 2025 12:59:57 -0800
Subject: [PATCH 60/79] =?UTF-8?q?Revert=20"[Exegesis]=20Add=20the=20abilit?=
 =?UTF-8?q?y=20to=20dry-run=20the=20measurement=20phase=20(=E2=80=A6=20(#1?=
 =?UTF-8?q?22371)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…#121991)"

This reverts commit f8f8598fd886cddfd374fa43eb6d7d37d301b576.

This breaks ARMv7 and s390x buildbot with the following message:
```
llvm-exegesis error: No available targets are compatible with triple "armv8l-unknown-linux-gnueabihf"
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /home/tcwg-buildbot/worker/clang-armv7-2stage/stage2/bin/FileCheck /home/tcwg-buildbot/worker/clang-armv7-2stage/llvm/llvm/test/tools/llvm-exegesis/dry-run-measurement.test
```
---
 llvm/docs/CommandGuide/llvm-exegesis.rst      |  1 -
 .../llvm-exegesis/dry-run-measurement.test    | 11 -------
 .../tools/llvm-exegesis/lib/BenchmarkResult.h |  1 -
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     | 33 +++++--------------
 llvm/tools/llvm-exegesis/lib/Target.cpp       |  4 +--
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    |  9 ++---
 6 files changed, 13 insertions(+), 46 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-exegesis/dry-run-measurement.test

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index d357c2ceea418..8266d891a5e6b 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -301,7 +301,6 @@ OPTIONS
   * ``prepare-and-assemble-snippet``: Same as ``prepare-snippet``, but also dumps an excerpt of the sequence (hex encoded).
   * ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
   * ``measure``: Same as ``assemble-measured-code``, but also runs the measurement.
-  * ``dry-run-measurement``: Same as measure, but does not actually execute the snippet.
 
 .. option:: --x86-lbr-sample-period=<nBranches/sample>
 
diff --git a/llvm/test/tools/llvm-exegesis/dry-run-measurement.test b/llvm/test/tools/llvm-exegesis/dry-run-measurement.test
deleted file mode 100644
index e4449d7df3d82..0000000000000
--- a/llvm/test/tools/llvm-exegesis/dry-run-measurement.test
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-exegesis --mtriple=riscv64 --mcpu=sifive-p470 --mode=latency --opcode-name=ADD --use-dummy-perf-counters --benchmark-phase=dry-run-measurement | FileCheck %s
-# REQUIRES: riscv-registered-target
-
-# This test makes sure that llvm-exegesis doesn't execute "cross-compiled" snippets in the presence of
-# --dry-run-measurement. RISC-V was chosen simply because most of the time we run tests on X86 machines.
-
-# Should not contain misleading results.
-# CHECK: measurements:    []
-
-# Should not contain error messages like "snippet crashed while running: Segmentation fault".
-# CHECK: error:           ''
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
index 5480d85616878..3c09a8380146e 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
@@ -38,7 +38,6 @@ enum class BenchmarkPhaseSelectorE {
   PrepareAndAssembleSnippet,
   AssembleMeasuredCode,
   Measure,
-  DryRunMeasure,
 };
 
 enum class BenchmarkFilter { All, RegOnly, WithMem };
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index cc46f7feb6cf7..a7771b99e97b1 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -99,7 +99,7 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
   static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
          BenchmarkRunner::ScratchSpace *Scratch,
-         std::optional<int> BenchmarkProcessCPU, bool DryRun) {
+         std::optional<int> BenchmarkProcessCPU) {
     Expected<ExecutableFunction> EF =
         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
 
@@ -107,17 +107,14 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
       return EF.takeError();
 
     return std::unique_ptr<InProcessFunctionExecutorImpl>(
-        new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch,
-                                          DryRun));
+        new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
   }
 
 private:
   InProcessFunctionExecutorImpl(const LLVMState &State,
                                 ExecutableFunction Function,
-                                BenchmarkRunner::ScratchSpace *Scratch,
-                                bool DryRun)
-      : State(State), Function(std::move(Function)), Scratch(Scratch),
-        DryRun(DryRun) {}
+                                BenchmarkRunner::ScratchSpace *Scratch)
+      : State(State), Function(std::move(Function)), Scratch(Scratch) {}
 
   static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
                                       SmallVector<int64_t, 4> *Result) {
@@ -146,14 +143,9 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
       CrashRecoveryContext CRC;
       CrashRecoveryContext::Enable();
       const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() {
-        if (DryRun) {
-          Counter->start();
-          Counter->stop();
-        } else {
-          Counter->start();
-          this->Function(ScratchPtr);
-          Counter->stop();
-        }
+        Counter->start();
+        this->Function(ScratchPtr);
+        Counter->stop();
       });
       CrashRecoveryContext::Disable();
       PS.reset();
@@ -185,7 +177,6 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
   const LLVMState &State;
   const ExecutableFunction Function;
   BenchmarkRunner::ScratchSpace *const Scratch;
-  bool DryRun = false;
 };
 
 #ifdef __linux__
@@ -673,9 +664,6 @@ Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
 BenchmarkRunner::createFunctionExecutor(
     object::OwningBinary<object::ObjectFile> ObjectFile,
     const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const {
-  bool DryRun =
-      BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::DryRunMeasure;
-
   switch (ExecutionMode) {
   case ExecutionModeE::InProcess: {
     if (BenchmarkProcessCPU.has_value())
@@ -683,8 +671,7 @@ BenchmarkRunner::createFunctionExecutor(
                                  "support benchmark core pinning.");
 
     auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
-        State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU,
-        DryRun);
+        State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU);
     if (!InProcessExecutorOrErr)
       return InProcessExecutorOrErr.takeError();
 
@@ -692,10 +679,6 @@ BenchmarkRunner::createFunctionExecutor(
   }
   case ExecutionModeE::SubProcess: {
 #ifdef __linux__
-    if (DryRun)
-      return make_error<Failure>("The subprocess execution mode cannot "
-                                 "dry-run measurement at this moment.");
-
     auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
         State, std::move(ObjectFile), Key, BenchmarkProcessCPU);
     if (!SubProcessExecutorOrErr)
diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp
index e2251ff978888..29e58692f0e92 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Target.cpp
@@ -98,7 +98,7 @@ ExegesisTarget::createBenchmarkRunner(
     return nullptr;
   case Benchmark::Latency:
   case Benchmark::InverseThroughput:
-    if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure &&
+    if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
         !PfmCounters.CycleCounter) {
       const char *ModeName = Mode == Benchmark::Latency
                                  ? "latency"
@@ -116,7 +116,7 @@ ExegesisTarget::createBenchmarkRunner(
         State, Mode, BenchmarkPhaseSelector, ResultAggMode, ExecutionMode,
         ValidationCounters, BenchmarkRepeatCount);
   case Benchmark::Uops:
-    if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure &&
+    if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
         !PfmCounters.UopsCounter && !PfmCounters.IssueCounters)
       return make_error<Failure>(
           "can't run 'uops' mode, sched model does not define uops or issue "
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 07bd44ee64f1f..fa37e05956be8 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -132,10 +132,7 @@ static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
         clEnumValN(
             BenchmarkPhaseSelectorE::Measure, "measure",
             "Same as prepare-measured-code, but also runs the measurement "
-            "(default)"),
-        clEnumValN(
-            BenchmarkPhaseSelectorE::DryRunMeasure, "dry-run-measurement",
-            "Same as measure, but does not actually execute the snippet")),
+            "(default)")),
     cl::init(BenchmarkPhaseSelectorE::Measure));
 
 static cl::opt<bool>
@@ -479,7 +476,7 @@ static void runBenchmarkConfigurations(
 }
 
 void benchmarkMain() {
-  if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure &&
+  if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&
       !UseDummyPerfCounters) {
 #ifndef HAVE_LIBPFM
     ExitWithError(
@@ -504,7 +501,7 @@ void benchmarkMain() {
 
   // Preliminary check to ensure features needed for requested
   // benchmark mode are present on target CPU and/or OS.
-  if (BenchmarkPhaseSelector >= BenchmarkPhaseSelectorE::Measure)
+  if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure)
     ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
 
   if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess &&

From f764e71b7017e7264fffc410f5e10ef959e49294 Mon Sep 17 00:00:00 2001
From: Victor Mustya <victor.mustya@intel.com>
Date: Thu, 9 Jan 2025 13:03:16 -0800
Subject: [PATCH 61/79] [Clang][TableGen] Add missing __bf16 type to the
 builtins parser (#120662)

The Clang tablegen built-in function prototype parser has the `__bf16`
type missing. This patch adds the missing type to the parser.
---
 clang/test/TableGen/target-builtins-prototype-parser.td | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/clang/test/TableGen/target-builtins-prototype-parser.td b/clang/test/TableGen/target-builtins-prototype-parser.td
index 555aebb3ccfb1..a753f906a674f 100644
--- a/clang/test/TableGen/target-builtins-prototype-parser.td
+++ b/clang/test/TableGen/target-builtins-prototype-parser.td
@@ -57,6 +57,12 @@ def : Builtin {
   let Spellings = ["__builtin_08"];
 }
 
+def : Builtin {
+// CHECK: BUILTIN(__builtin_09, "V2yy", "")
+  let Prototype = "_Vector<2, __bf16>(__bf16)";
+  let Spellings = ["__builtin_09"];
+}
+
 #ifdef ERROR_EXPECTED_LANES
 def : Builtin {
 // ERROR_EXPECTED_LANES: :[[# @LINE + 1]]:7: error: Expected number of lanes after '_ExtVector<'
@@ -112,4 +118,3 @@ def : Builtin {
   let Spellings = ["__builtin_test_use_clang_extended_vectors"];
 }
 #endif
-

From c492a228e9227e2e44671b0f345fee9de62517bd Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Thu, 9 Jan 2025 16:03:34 -0500
Subject: [PATCH 62/79] [libc++] Add missing _LIBCPP_NODEBUG on internal
 aliases

---
 libcxx/include/__algorithm/radix_sort.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h
index c39b26a8620f4..95f04a8bb31f6 100644
--- a/libcxx/include/__algorithm/radix_sort.h
+++ b/libcxx/include/__algorithm/radix_sort.h
@@ -88,10 +88,10 @@ __partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator
 
 template <class _Value, class _Map, class _Radix>
 struct __radix_sort_traits {
-  using __image_type = decay_t<typename __invoke_of<_Map, _Value>::type>;
+  using __image_type _LIBCPP_NODEBUG = decay_t<typename __invoke_of<_Map, _Value>::type>;
   static_assert(is_unsigned<__image_type>::value);
 
-  using __radix_type = decay_t<typename __invoke_of<_Radix, __image_type>::type>;
+  using __radix_type _LIBCPP_NODEBUG = decay_t<typename __invoke_of<_Radix, __image_type>::type>;
   static_assert(is_integral<__radix_type>::value);
 
   static constexpr auto __radix_value_range = numeric_limits<__radix_type>::max() + 1;
@@ -101,7 +101,7 @@ struct __radix_sort_traits {
 
 template <class _Value, class _Map>
 struct __counting_sort_traits {
-  using __image_type = decay_t<typename __invoke_of<_Map, _Value>::type>;
+  using __image_type _LIBCPP_NODEBUG = decay_t<typename __invoke_of<_Map, _Value>::type>;
   static_assert(is_unsigned<__image_type>::value);
 
   static constexpr const auto __value_range = numeric_limits<__image_type>::max() + 1;

From 328c3a843f886f3768e536a508e1e3723d834b3e Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 16:10:40 -0500
Subject: [PATCH 63/79] [RISCV][VLOPT] Add vmerge to isSupportedInstr (#122340)

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp    |  4 ++
 .../RISCV/rvv/fixed-vectors-int-buildvec.ll   |  3 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-int.ll    |  2 -
 .../CodeGen/RISCV/rvv/narrow-shift-extend.ll  |  3 +-
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll  | 60 +++++++++++++++++++
 5 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 89684decde6c1..ab73b112e6dd8 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -944,6 +944,10 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   case RISCV::VMADD_VX:
   case RISCV::VNMSUB_VV:
   case RISCV::VNMSUB_VX:
+  // Vector Integer Merge Instructions
+  case RISCV::VMERGE_VIM:
+  case RISCV::VMERGE_VVM:
+  case RISCV::VMERGE_VXM:
   // Vector Widening Integer Multiply-Add Instructions
   case RISCV::VWMACCU_VV:
   case RISCV::VWMACCU_VX:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 3bed5cd04f214..c628a0d620498 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -697,7 +697,7 @@ define void @buildvec_seq_v9i8(ptr %x) {
 ; CHECK-LABEL: buildvec_seq_v9i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 73
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 9, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v9, 3
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a1
@@ -706,7 +706,6 @@ define void @buildvec_seq_v9i8(ptr %x) {
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
 ; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vsetivli zero, 9, e8, m1, ta, ma
 ; CHECK-NEXT:    vmerge.vim v8, v9, 2, v0
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 80e462c937690..392709fdb4cf7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1290,10 +1290,8 @@ define void @mulhs_v6i16(ptr %x) {
 ; CHECK-NEXT:    vle16.v v8, (a0)
 ; CHECK-NEXT:    li a1, 22
 ; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v9, -7
 ; CHECK-NEXT:    vmerge.vim v9, v9, 7, v0
-; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
index 3fbe635576c9b..53f8a2503354c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll
@@ -142,10 +142,9 @@ entry:
 define <vscale x 4 x i32> @test_vloxei7(ptr %ptr, <vscale x 4 x i1> %offset, i64 %vl) {
 ; CHECK-LABEL: test_vloxei7:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
 ; CHECK-NEXT:    vsll.vi v12, v8, 2
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vloxei64.v v8, (a0), v12
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 46fbba35c35a2..da7917cae8ba6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3535,3 +3535,63 @@ define <vscale x 4 x i1> @vmfgt_vv(<vscale x 4 x float> %a, <vscale x 4 x i1> %b
   %2 = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %b, iXLen %vl)
   ret <vscale x 4 x i1> %2
 }
+
+define <vscale x 4 x i32> @vmerge_vvm(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vmerge_vvm:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmerge.vvm v8, v8, v10, v0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmerge_vvm:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmerge.vvm v8, v8, v10, v0
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmerge.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %c, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmerge_vxm(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i1> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vmerge_vxm:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmerge.vxm v8, v8, a0, v0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmerge_vxm:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmerge.vxm v8, v8, a0, v0
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmerge.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i1> %c, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmerge_vim(<vscale x 4 x i32> %a, <vscale x 4 x i1> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vmerge_vim:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vmerge.vim v8, v8, 9, v0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vmerge_vim:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vmerge.vim v8, v8, 9, v0
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vmerge.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 9, <vscale x 4 x i1> %c, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}

From 03eb786f75e36e9e203e0092ec3c6c589fd53c4f Mon Sep 17 00:00:00 2001
From: Tom Honermann <tom.honermann@intel.com>
Date: Thu, 9 Jan 2025 16:40:04 -0500
Subject: [PATCH 64/79] [SYCL] Correct misplaced sycl_kernel_entry_point
 attribute in a namespace declaration of a negative test. (#122375)

Commit 1a73654b3212b623ac21b9deb3aeaadc6906b7e4 added a missing
diagnostic for incorrect placement of an attribute in a namespace
declaration. This change corrects a SYCL test that inadvertently
exercised the `sycl_kernel_entry_point` attribute in the wrong
declaration location.
---
 .../SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
index 5b3cf9853173d..a87af7ca298ac 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
@@ -170,7 +170,7 @@ struct B3 {
 };
 
 // expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
-namespace bad4 [[clang::sycl_kernel_entry_point(BADKN<4>)]] {}
+namespace [[clang::sycl_kernel_entry_point(BADKN<4>)]] bad4 {}
 
 #if __cplusplus >= 202002L
 // expected-error@+2 {{'sycl_kernel_entry_point' attribute only applies to functions}}

From 0efb376c20b220cddbbcf57f0c82ae048170ffd9 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nickdesaulniers@users.noreply.github.com>
Date: Thu, 9 Jan 2025 13:40:54 -0800
Subject: [PATCH 65/79] [libc][test] remove C++ stdlib includes (#122369)

These make cross compiling the test suite more difficult, as you need
the
sysroot to contain these headers and libraries cross compiled for your
target.
It's straightforward to stick with the corresponding C headers.
---
 libc/test/UnitTest/ExecuteFunctionUnix.cpp    | 17 ++++++++---------
 libc/test/UnitTest/FPExceptMatcher.cpp        |  7 +++----
 libc/test/UnitTest/LibcDeathTestExecutors.cpp |  2 +-
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/libc/test/UnitTest/ExecuteFunctionUnix.cpp b/libc/test/UnitTest/ExecuteFunctionUnix.cpp
index 3a657c00851c7..df71738668592 100644
--- a/libc/test/UnitTest/ExecuteFunctionUnix.cpp
+++ b/libc/test/UnitTest/ExecuteFunctionUnix.cpp
@@ -8,13 +8,13 @@
 
 #include "ExecuteFunction.h"
 #include "src/__support/macros/config.h"
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <iostream>
-#include <memory>
+#include "test/UnitTest/ExecuteFunction.h" // FunctionCaller
+#include <assert.h>
 #include <poll.h>
 #include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include <sys/wait.h>
 #include <unistd.h>
 
@@ -35,21 +35,20 @@ int ProcessStatus::get_fatal_signal() {
 }
 
 ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) {
-  std::unique_ptr<FunctionCaller> X(func);
   int pipe_fds[2];
   if (::pipe(pipe_fds) == -1)
     return ProcessStatus::error("pipe(2) failed");
 
   // Don't copy the buffers into the child process and print twice.
-  std::cout.flush();
-  std::cerr.flush();
+  ::fflush(stderr);
+  ::fflush(stdout);
   pid_t pid = ::fork();
   if (pid == -1)
     return ProcessStatus::error("fork(2) failed");
 
   if (!pid) {
     (*func)();
-    std::exit(0);
+    ::exit(0);
   }
   ::close(pipe_fds[1]);
 
diff --git a/libc/test/UnitTest/FPExceptMatcher.cpp b/libc/test/UnitTest/FPExceptMatcher.cpp
index 37ba0a0a7abde..889f9f6f44a91 100644
--- a/libc/test/UnitTest/FPExceptMatcher.cpp
+++ b/libc/test/UnitTest/FPExceptMatcher.cpp
@@ -9,11 +9,11 @@
 #include "FPExceptMatcher.h"
 
 #include "src/__support/macros/config.h"
+#include "test/UnitTest/ExecuteFunction.h" // FunctionCaller
 #include "test/UnitTest/Test.h"
 
 #include "hdr/types/fenv_t.h"
 #include "src/__support/FPUtil/FEnvImpl.h"
-#include <memory>
 #include <setjmp.h>
 #include <signal.h>
 
@@ -37,14 +37,13 @@ static void sigfpeHandler(int sig) {
 }
 
 FPExceptMatcher::FPExceptMatcher(FunctionCaller *func) {
-  auto oldSIGFPEHandler = signal(SIGFPE, &sigfpeHandler);
-  std::unique_ptr<FunctionCaller> funcUP(func);
+  sighandler_t oldSIGFPEHandler = signal(SIGFPE, &sigfpeHandler);
 
   caughtExcept = false;
   fenv_t oldEnv;
   fputil::get_env(&oldEnv);
   if (sigsetjmp(jumpBuffer, 1) == 0)
-    funcUP->call();
+    func->call();
   // We restore the previous floating point environment after
   // the call to the function which can potentially raise SIGFPE.
   fputil::set_env(&oldEnv);
diff --git a/libc/test/UnitTest/LibcDeathTestExecutors.cpp b/libc/test/UnitTest/LibcDeathTestExecutors.cpp
index 77b0559c7acea..943e2c23c5fde 100644
--- a/libc/test/UnitTest/LibcDeathTestExecutors.cpp
+++ b/libc/test/UnitTest/LibcDeathTestExecutors.cpp
@@ -12,7 +12,7 @@
 #include "test/UnitTest/ExecuteFunction.h"
 #include "test/UnitTest/TestLogger.h"
 
-#include <cassert>
+#include <assert.h>
 
 namespace {
 constexpr unsigned TIMEOUT_MS = 10000;

From 4f42e165164ba2bfca7b87be2a533ef09e8777e0 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston@google.com>
Date: Thu, 9 Jan 2025 13:48:26 -0800
Subject: [PATCH 66/79] [hwasan] Omit tag check for null pointers (#122206)

If the pointer to be checked is statically known to be zero, the tag
check will always pass since:
1) the tag is zero
2) shadow memory for address 0 is initialized to 0 and never updated.
We can therefore elide the tag check.

We perform the elision in two places:
1) the HWASan pass
2) when lowering the CHECK_MEMACCESS intrinsic. Conceivably, the HWASan
pass may encounter a "cannot currently statically prove to be null"
pointer (and is therefore unable to omit the intrinsic) that later
optimization passes convert into a statically known-null pointer. As a
last line of defense, we perform elision here too.

This also updates the tests from
https://github.com/llvm/llvm-project/pull/122186
---
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  9 +++++++++
 .../Instrumentation/HWAddressSanitizer.cpp    | 20 +++++++++++++++----
 llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll  | 12 ++++++-----
 .../HWAddressSanitizer/zero-ptr.ll            |  4 +---
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 9bec782ca8ce9..9d9d9889b3858 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -605,6 +605,15 @@ void AArch64AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
 
 void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
   Register Reg = MI.getOperand(0).getReg();
+
+  // The HWASan pass won't emit a CHECK_MEMACCESS intrinsic with a pointer
+  // statically known to be zero. However, conceivably, the HWASan pass may
+  // encounter a "cannot currently statically prove to be null" pointer (and is
+  // therefore unable to omit the intrinsic) that later optimization passes
+  // convert into a statically known-null pointer.
+  if (Reg == AArch64::XZR)
+    return;
+
   bool IsShort =
       ((MI.getOpcode() == AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES) ||
        (MI.getOpcode() ==
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 2031728c2f33d..75a19357ea1bb 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -348,7 +348,7 @@ class HWAddressSanitizer {
   bool ignoreMemIntrinsic(OptimizationRemarkEmitter &ORE, MemIntrinsic *MI);
   void instrumentMemIntrinsic(MemIntrinsic *MI);
   bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
-                           LoopInfo *LI);
+                           LoopInfo *LI, const DataLayout &DL);
   bool ignoreAccessWithoutRemark(Instruction *Inst, Value *Ptr);
   bool ignoreAccess(OptimizationRemarkEmitter &ORE, Instruction *Inst,
                     Value *Ptr);
@@ -1163,12 +1163,23 @@ void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
 }
 
 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O,
-                                             DomTreeUpdater &DTU,
-                                             LoopInfo *LI) {
+                                             DomTreeUpdater &DTU, LoopInfo *LI,
+                                             const DataLayout &DL) {
   Value *Addr = O.getPtr();
 
   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
 
+  // If the pointer is statically known to be zero, the tag check will pass
+  // since:
+  // 1) it has a zero tag
+  // 2) the shadow memory corresponding to address 0 is initialized to zero and
+  //    never updated.
+  // We can therefore elide the tag check.
+  llvm::KnownBits Known(DL.getPointerTypeSizeInBits(Addr->getType()));
+  llvm::computeKnownBits(Addr, Known, DL);
+  if (Known.isZero())
+    return false;
+
   if (O.MaybeMask)
     return false; // FIXME
 
@@ -1701,8 +1712,9 @@ void HWAddressSanitizer::sanitizeFunction(Function &F,
   PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(F);
   LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(F);
   DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
+  const DataLayout &DL = F.getDataLayout();
   for (auto &Operand : OperandsToInstrument)
-    instrumentMemAccess(Operand, DTU, LI);
+    instrumentMemAccess(Operand, DTU, LI, DL);
   DTU.flush();
 
   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
diff --git a/llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll b/llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll
index dca39fe03fb10..b2eaa31007c35 100644
--- a/llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll
+++ b/llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll
@@ -1,11 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -filetype asm -o - %s | FileCheck %s
 
-; This shows that when dereferencing a null pointer, HWASan will call
-; __hwasan_check_x4294967071_19_fixed_0_short_v2
-; (N.B. 4294967071 == 2**32 - 239 + 14 == 2**32 - X0 + XZR
+; This shows that CodeGen for AArch64 will elide the tag check when lowering
+; the HWASan memaccess intrinsic for null pointers.
 ;
-; The source was generated from llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll.
+; N.B. The HWASan pass will normally omit the memaccess intrinsic if the
+;      pointer is already statically known to be null.
+;
+; The source was generated from llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll
+; with the memaccess deliberately retained.
 
 ; ModuleID = '<stdin>'
 source_filename = "<stdin>"
@@ -25,7 +28,6 @@ define void @test_store_to_zeroptr() #0 {
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl __hwasan_check_x4294967071_19_fixed_0_short_v2
 ; CHECK-NEXT:    mov x8, xzr
 ; CHECK-NEXT:    mov w9, #42 // =0x2a
 ; CHECK-NEXT:    str x9, [x8]
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll b/llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll
index a201174df995b..95cf6f1544df0 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/zero-ptr.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -passes=hwasan -S | FileCheck %s
 ; RUN: opt < %s -passes=hwasan -hwasan-recover=0 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=ABORT-ZERO-BASED-SHADOW
 
-; This shows that HWASan will emit a memaccess check when dereferencing a null
+; This shows that HWASan omits the memaccess check when dereferencing a null
 ; pointer.
 ; The output is used as the source for llvm/test/CodeGen/AArch64/hwasan-zero-ptr.ll.
 
@@ -15,7 +15,6 @@ define void @test_store_to_zeroptr() sanitize_hwaddress {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow)
 ; CHECK-NEXT:    [[B:%.*]] = inttoptr i64 0 to ptr
-; CHECK-NEXT:    call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[B]], i32 19)
 ; CHECK-NEXT:    store i64 42, ptr [[B]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -24,7 +23,6 @@ define void @test_store_to_zeroptr() sanitize_hwaddress {
 ; ABORT-ZERO-BASED-SHADOW-NEXT:  entry:
 ; ABORT-ZERO-BASED-SHADOW-NEXT:    [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr null)
 ; ABORT-ZERO-BASED-SHADOW-NEXT:    [[B:%.*]] = inttoptr i64 0 to ptr
-; ABORT-ZERO-BASED-SHADOW-NEXT:    call void @llvm.hwasan.check.memaccess.shortgranules.fixedshadow(ptr [[B]], i32 19, i64 0)
 ; ABORT-ZERO-BASED-SHADOW-NEXT:    store i64 42, ptr [[B]], align 8
 ; ABORT-ZERO-BASED-SHADOW-NEXT:    ret void
 ;

From 7ffb691595a3108f72023ae0051487df25a85fc3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 9 Jan 2025 22:02:29 +0000
Subject: [PATCH 67/79] [VPlan] Remove dead ToRemove (NFC).

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3e3f5adf73a0f..8f6fb07b1e4f2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -588,7 +588,6 @@ static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
 /// vector extracts.
 static void legalizeAndOptimizeInductions(VPlan &Plan) {
   using namespace llvm::VPlanPatternMatch;
-  SmallVector<VPRecipeBase *> ToRemove;
   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
   bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
   VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());

From 156e6051630d136b48090c0d1cb79a4457564e9d Mon Sep 17 00:00:00 2001
From: alx32 <103613512+alx32@users.noreply.github.com>
Date: Thu, 9 Jan 2025 14:14:13 -0800
Subject: [PATCH 68/79] [lld-macho] Fix branch extension thunk estimation logic
 (#120529)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch improves the linker’s ability to estimate stub reachability
in the `TextOutputSection::estimateStubsInRangeVA` function. It does so
by including thunks that have already been placed ahead of the current
call site address when calculating the threshold for direct stub calls.

Before this fix, the estimation process overlooked existing forward
thunks. This could result in some thunks not being inserted where
needed. In rare situations, particularly with large and specially
arranged codebases, this might lead to branch instructions being out of
range, causing linking errors.

Although this patch successfully addresses the problem, it is not
feasible to create a test for this issue. The specific layout and order
of thunk creation required to reproduce the corner case are too complex,
making test creation impractical.

Example error messages the issue could generate:
```
ld64.lld: error: banana.o:(symbol OUTLINED_FUNCTION_24949_3875): relocation BRANCH26 is out of range: 134547892 is not in [-134217728, 134217727]; references objc_autoreleaseReturnValue
ld64.lld: error: main.o:(symbol _main+0xc): relocation BRANCH26 is out of range: 134544132 is not in [-134217728, 134217727]; references objc_release
```
---
 lld/MachO/ConcatOutputSection.cpp | 36 +++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 6a9301f84a03e..d64816ec695ad 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -184,15 +184,43 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
     InputSection *isec = inputs[i];
     isecEnd = alignToPowerOf2(isecEnd, isec->align) + isec->getSize();
   }
-  // Estimate the address after which call sites can safely call stubs
-  // directly rather than through intermediary thunks.
+
+  // Tally up any thunks that have already been placed that have VA higher than
+  // inputs[callIdx]. First, find the index of the first thunk that is beyond
+  // the current inputs[callIdx].
+  auto itPostcallIdxThunks =
+      llvm::partition_point(thunks, [isecVA](const ConcatInputSection *t) {
+        return t->getVA() <= isecVA;
+      });
+  uint64_t existingForwardThunks = thunks.end() - itPostcallIdxThunks;
+
   uint64_t forwardBranchRange = target->forwardBranchRange;
   assert(isecEnd > forwardBranchRange &&
          "should not run thunk insertion if all code fits in jump range");
   assert(isecEnd - isecVA <= forwardBranchRange &&
          "should only finalize sections in jump range");
-  uint64_t stubsInRangeVA = isecEnd + maxPotentialThunks * target->thunkSize +
-                            in.stubs->getSize() - forwardBranchRange;
+
+  // Estimate the maximum size of the code, right before the stubs section.
+  uint64_t maxTextSize = 0;
+  // Add the size of all the inputs, including the unprocessed ones.
+  maxTextSize += isecEnd;
+
+  // Add the size of the thunks that have already been created that are ahead of
+  // inputs[callIdx]. These are already created thunks that will be interleaved
+  // with inputs[callIdx...end].
+  maxTextSize += existingForwardThunks * target->thunkSize;
+
+  // Add the size of the thunks that may be created in the future. Since
+  // 'maxPotentialThunks' overcounts, this is an estimate of the upper limit.
+  maxTextSize += maxPotentialThunks * target->thunkSize;
+
+  // Estimated maximum VA of last stub.
+  uint64_t maxVAOfLastStub = maxTextSize + in.stubs->getSize();
+
+  // Estimate the address after which call sites can safely call stubs
+  // directly rather than through intermediary thunks.
+  uint64_t stubsInRangeVA = maxVAOfLastStub - forwardBranchRange;
+
   log("thunks = " + std::to_string(thunkMap.size()) +
       ", potential = " + std::to_string(maxPotentialThunks) +
       ", stubs = " + std::to_string(in.stubs->getSize()) + ", isecVA = " +

From 3caa68a021c2f795de3df7f6ad7953556e825fab Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nickdesaulniers@users.noreply.github.com>
Date: Thu, 9 Jan 2025 14:15:31 -0800
Subject: [PATCH 69/79] [libc][test] fix memory leak (#122378)

Looks like the smart pointer I removed was being used to free the underlying
object.

Fixes: #122369
---
 libc/test/UnitTest/ExecuteFunctionUnix.cpp | 19 +++++++++++++++----
 libc/test/UnitTest/FPExceptMatcher.cpp     |  1 +
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/libc/test/UnitTest/ExecuteFunctionUnix.cpp b/libc/test/UnitTest/ExecuteFunctionUnix.cpp
index df71738668592..b004e8c089117 100644
--- a/libc/test/UnitTest/ExecuteFunctionUnix.cpp
+++ b/libc/test/UnitTest/ExecuteFunctionUnix.cpp
@@ -36,18 +36,23 @@ int ProcessStatus::get_fatal_signal() {
 
 ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) {
   int pipe_fds[2];
-  if (::pipe(pipe_fds) == -1)
+  if (::pipe(pipe_fds) == -1) {
+    ::free(func);
     return ProcessStatus::error("pipe(2) failed");
+  }
 
   // Don't copy the buffers into the child process and print twice.
   ::fflush(stderr);
   ::fflush(stdout);
   pid_t pid = ::fork();
-  if (pid == -1)
+  if (pid == -1) {
+    ::free(func);
     return ProcessStatus::error("fork(2) failed");
+  }
 
   if (!pid) {
     (*func)();
+    ::free(func);
     ::exit(0);
   }
   ::close(pipe_fds[1]);
@@ -57,11 +62,14 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) {
   };
   // No events requested so this call will only return after the timeout or if
   // the pipes peer was closed, signaling the process exited.
-  if (::poll(&poll_fd, 1, timeout_ms) == -1)
+  if (::poll(&poll_fd, 1, timeout_ms) == -1) {
+    ::free(func);
     return ProcessStatus::error("poll(2) failed");
+  }
   // If the pipe wasn't closed by the child yet then timeout has expired.
   if (!(poll_fd.revents & POLLHUP)) {
     ::kill(pid, SIGKILL);
+    ::free(func);
     return ProcessStatus::timed_out_ps();
   }
 
@@ -69,9 +77,12 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) {
   // Wait on the pid of the subprocess here so it gets collected by the system
   // and doesn't turn into a zombie.
   pid_t status = ::waitpid(pid, &wstatus, 0);
-  if (status == -1)
+  if (status == -1) {
+    ::free(func);
     return ProcessStatus::error("waitpid(2) failed");
+  }
   assert(status == pid);
+  ::free(func);
   return {wstatus};
 }
 
diff --git a/libc/test/UnitTest/FPExceptMatcher.cpp b/libc/test/UnitTest/FPExceptMatcher.cpp
index 889f9f6f44a91..928c3df6ac48a 100644
--- a/libc/test/UnitTest/FPExceptMatcher.cpp
+++ b/libc/test/UnitTest/FPExceptMatcher.cpp
@@ -44,6 +44,7 @@ FPExceptMatcher::FPExceptMatcher(FunctionCaller *func) {
   fputil::get_env(&oldEnv);
   if (sigsetjmp(jumpBuffer, 1) == 0)
     func->call();
+  free(func);
   // We restore the previous floating point environment after
   // the call to the function which can potentially raise SIGFPE.
   fputil::set_env(&oldEnv);

From e6d061ad49ce11193bddfff8a7920b798fb7d214 Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Thu, 9 Jan 2025 14:37:14 -0800
Subject: [PATCH 70/79] [libclang/python] Add python bindings for
 PrintingPolicy (#120494)

This allows changing the way pretty-printed code is formatted.
---
 clang/bindings/python/clang/cindex.py         | 83 +++++++++++++++++++
 .../python/tests/cindex/test_cursor.py        | 14 ++++
 clang/docs/ReleaseNotes.rst                   |  2 +
 3 files changed, 99 insertions(+)

diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index b6e0d71c1ae1b..7caf0bbfd722a 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -1770,6 +1770,16 @@ def spelling(self):
 
         return self._spelling
 
+    def pretty_printed(self, policy):
+        """
+        Pretty print declarations.
+        Parameters:
+        policy -- The policy to control the entities being printed.
+        """
+        return _CXString.from_result(
+            conf.lib.clang_getCursorPrettyPrinted(self, policy)
+        )
+
     @property
     def displayname(self):
         """
@@ -3699,6 +3709,72 @@ def write_main_file_to_stdout(self):
         conf.lib.clang_CXRewriter_writeMainFileToStdOut(self)
 
 
+class PrintingPolicyProperty(BaseEnumeration):
+
+    """
+    A PrintingPolicyProperty identifies a property of a PrintingPolicy.
+    """
+
+    Indentation = 0
+    SuppressSpecifiers = 1
+    SuppressTagKeyword = 2
+    IncludeTagDefinition = 3
+    SuppressScope = 4
+    SuppressUnwrittenScope = 5
+    SuppressInitializers = 6
+    ConstantArraySizeAsWritten = 7
+    AnonymousTagLocations = 8
+    SuppressStrongLifetime = 9
+    SuppressLifetimeQualifiers = 10
+    SuppressTemplateArgsInCXXConstructors = 11
+    Bool = 12
+    Restrict = 13
+    Alignof = 14
+    UnderscoreAlignof = 15
+    UseVoidForZeroParams = 16
+    TerseOutput = 17
+    PolishForDeclaration = 18
+    Half = 19
+    MSWChar = 20
+    IncludeNewlines = 21
+    MSVCFormatting = 22
+    ConstantsAsWritten = 23
+    SuppressImplicitBase = 24
+    FullyQualifiedName = 25
+
+
+class PrintingPolicy(ClangObject):
+    """
+    The PrintingPolicy is a wrapper class around clang::PrintingPolicy
+
+    It allows specifying how declarations, expressions, and types should be
+    pretty-printed.
+    """
+
+    @staticmethod
+    def create(cursor):
+        """
+        Creates a new PrintingPolicy
+        Parameters:
+        cursor -- Any cursor for a translation unit.
+        """
+        return PrintingPolicy(conf.lib.clang_getCursorPrintingPolicy(cursor))
+
+    def __init__(self, ptr):
+        ClangObject.__init__(self, ptr)
+
+    def __del__(self):
+        conf.lib.clang_PrintingPolicy_dispose(self)
+
+    def get_property(self, property):
+        """Get a property value for the given printing policy."""
+        return conf.lib.clang_PrintingPolicy_getProperty(self, property.value)
+
+    def set_property(self, property, value):
+        """Set a property value for the given printing policy."""
+        conf.lib.clang_PrintingPolicy_setProperty(self, property.value, value)
+
+
 # Now comes the plumbing to hook up the C library.
 
 # Register callback types
@@ -3801,6 +3877,8 @@ def write_main_file_to_stdout(self):
     ("clang_getCursorExtent", [Cursor], SourceRange),
     ("clang_getCursorLexicalParent", [Cursor], Cursor),
     ("clang_getCursorLocation", [Cursor], SourceLocation),
+    ("clang_getCursorPrettyPrinted", [Cursor, PrintingPolicy], _CXString),
+    ("clang_getCursorPrintingPolicy", [Cursor], c_object_p),
     ("clang_getCursorReferenced", [Cursor], Cursor),
     ("clang_getCursorReferenceNameRange", [Cursor, c_uint, c_uint], SourceRange),
     ("clang_getCursorResultType", [Cursor], Type),
@@ -3924,6 +4002,9 @@ def write_main_file_to_stdout(self):
     ("clang_Cursor_isAnonymousRecordDecl", [Cursor], bool),
     ("clang_Cursor_isBitField", [Cursor], bool),
     ("clang_Location_isInSystemHeader", [SourceLocation], bool),
+    ("clang_PrintingPolicy_dispose", [PrintingPolicy]),
+    ("clang_PrintingPolicy_getProperty", [PrintingPolicy, c_int], c_uint),
+    ("clang_PrintingPolicy_setProperty", [PrintingPolicy, c_int, c_uint]),
     ("clang_Type_getAlignOf", [Type], c_longlong),
     ("clang_Type_getClassType", [Type], Type),
     ("clang_Type_getNumTemplateArguments", [Type], c_int),
@@ -4104,6 +4185,8 @@ def function_exists(self, name: str) -> bool:
     "FixIt",
     "Index",
     "LinkageKind",
+    "PrintingPolicy",
+    "PrintingPolicyProperty",
     "RefQualifierKind",
     "SourceLocation",
     "SourceRange",
diff --git a/clang/bindings/python/tests/cindex/test_cursor.py b/clang/bindings/python/tests/cindex/test_cursor.py
index 4d989a7421e79..c6aa65ce3c29f 100644
--- a/clang/bindings/python/tests/cindex/test_cursor.py
+++ b/clang/bindings/python/tests/cindex/test_cursor.py
@@ -5,6 +5,8 @@
     BinaryOperator,
     Config,
     CursorKind,
+    PrintingPolicy,
+    PrintingPolicyProperty,
     StorageClass,
     TemplateArgumentKind,
     TranslationUnit,
@@ -981,3 +983,15 @@ def test_from_result_null(self):
     def test_from_cursor_result_null(self):
         tu = get_tu("")
         self.assertEqual(tu.cursor.semantic_parent, None)
+
+    def test_pretty_print(self):
+        tu = get_tu("struct X { int x; }; void f(bool x) { }", lang="cpp")
+        f = get_cursor(tu, "f")
+
+        self.assertEqual(f.displayname, "f(bool)")
+        pp = PrintingPolicy.create(f)
+        self.assertEqual(pp.get_property(PrintingPolicyProperty.Bool), True)
+        self.assertEqual(f.pretty_printed(pp), "void f(bool x) {\n}\n")
+        pp.set_property(PrintingPolicyProperty.Bool, False)
+        self.assertEqual(pp.get_property(PrintingPolicyProperty.Bool), False)
+        self.assertEqual(f.pretty_printed(pp), "void f(_Bool x) {\n}\n")
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5ac886856c539..07a1a4195427d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1298,6 +1298,8 @@ Sanitizers
 Python Binding Changes
 ----------------------
 - Fixed an issue that led to crashes when calling ``Type.get_exception_specification_kind``.
+- Added bindings for ``clang_getCursorPrettyPrinted`` and related functions,
+  which allow changing the formatting of pretty-printed code.
 - Added binding for ``clang_Cursor_isAnonymousRecordDecl``, which allows checking if
   a declaration is an anonymous union or anonymous struct.
 

From 9426fdd4cbd6812b69c218b865f184cb25342be4 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <nickdesaulniers@users.noreply.github.com>
Date: Thu, 9 Jan 2025 14:46:52 -0800
Subject: [PATCH 71/79] [libc][test] fix memory leak pt.2 (#122384)

These were created with operator new (see `Test::createCallable`), so operator
delete should be used instead of free().

Fixes: #122369
Fixes: #122378
---
 libc/test/UnitTest/ExecuteFunctionUnix.cpp | 14 +++++++-------
 libc/test/UnitTest/FPExceptMatcher.cpp     |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libc/test/UnitTest/ExecuteFunctionUnix.cpp b/libc/test/UnitTest/ExecuteFunctionUnix.cpp
index b004e8c089117..c90d7e0cb9ff1 100644
--- a/libc/test/UnitTest/ExecuteFunctionUnix.cpp
+++ b/libc/test/UnitTest/ExecuteFunctionUnix.cpp
@@ -37,7 +37,7 @@ int ProcessStatus::get_fatal_signal() {
 ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) {
   int pipe_fds[2];
   if (::pipe(pipe_fds) == -1) {
-    ::free(func);
+    delete func;
     return ProcessStatus::error("pipe(2) failed");
   }
 
@@ -46,13 +46,13 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) {
   ::fflush(stdout);
   pid_t pid = ::fork();
   if (pid == -1) {
-    ::free(func);
+    delete func;
     return ProcessStatus::error("fork(2) failed");
   }
 
   if (!pid) {
     (*func)();
-    ::free(func);
+    delete func;
     ::exit(0);
   }
   ::close(pipe_fds[1]);
@@ -63,13 +63,13 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) {
   // No events requested so this call will only return after the timeout or if
   // the pipes peer was closed, signaling the process exited.
   if (::poll(&poll_fd, 1, timeout_ms) == -1) {
-    ::free(func);
+    delete func;
     return ProcessStatus::error("poll(2) failed");
   }
   // If the pipe wasn't closed by the child yet then timeout has expired.
   if (!(poll_fd.revents & POLLHUP)) {
     ::kill(pid, SIGKILL);
-    ::free(func);
+    delete func;
     return ProcessStatus::timed_out_ps();
   }
 
@@ -78,11 +78,11 @@ ProcessStatus invoke_in_subprocess(FunctionCaller *func, unsigned timeout_ms) {
   // and doesn't turn into a zombie.
   pid_t status = ::waitpid(pid, &wstatus, 0);
   if (status == -1) {
-    ::free(func);
+    delete func;
     return ProcessStatus::error("waitpid(2) failed");
   }
   assert(status == pid);
-  ::free(func);
+  delete func;
   return {wstatus};
 }
 
diff --git a/libc/test/UnitTest/FPExceptMatcher.cpp b/libc/test/UnitTest/FPExceptMatcher.cpp
index 928c3df6ac48a..119a06985b8f1 100644
--- a/libc/test/UnitTest/FPExceptMatcher.cpp
+++ b/libc/test/UnitTest/FPExceptMatcher.cpp
@@ -44,7 +44,7 @@ FPExceptMatcher::FPExceptMatcher(FunctionCaller *func) {
   fputil::get_env(&oldEnv);
   if (sigsetjmp(jumpBuffer, 1) == 0)
     func->call();
-  free(func);
+  delete func;
   // We restore the previous floating point environment after
   // the call to the function which can potentially raise SIGFPE.
   fputil::set_env(&oldEnv);

From ba704d59569151f1b8b6552ed22a7b840f5e6256 Mon Sep 17 00:00:00 2001
From: alx32 <103613512+alx32@users.noreply.github.com>
Date: Thu, 9 Jan 2025 15:01:06 -0800
Subject: [PATCH 72/79] [llvm-gsymutil] Address merged-funcs test
 non-determinism (#122308)

When creating a gSYM, `llvm-gsymutil` operates by default in
multi-threaded mode. If merged functions are present, determinism cannot
be guaranteed if parallel processing is enabled.

So, for our merged functions tests, we disable multi-threading during
gSYM creation.
---
 .../macho-gsym-callsite-info-dsym.yaml        |  2 +-
 .../macho-gsym-callsite-info-exe.yaml         |  2 +-
 .../macho-gsym-callsite-info-obj.test         |  2 +-
 .../macho-gsym-merged-callsites-dsym.yaml     |  4 +-
 .../ARM_AArch64/macho-merged-funcs-dwarf.yaml | 69 +++++++++----------
 5 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml
index c636afe5cb850..6d0dc7f6f3033 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml
@@ -4,7 +4,7 @@
 # RUN: split-file %s %t
 # RUN: yaml2obj %t/call_sites.dSYM.yaml -o %t/call_sites.dSYM
 
-# RUN: llvm-gsymutil --convert=%t/call_sites.dSYM --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
+# RUN: llvm-gsymutil --num-threads=1 --convert=%t/call_sites.dSYM --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
 
 # Dump the GSYM file and check the output for callsite information
 # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-GSYM %s
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml
index 557f43d778e58..5a2ec2be3c94a 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml
@@ -4,7 +4,7 @@
 # RUN: split-file %s %t
 # RUN: yaml2obj %t/call_sites.exe.yaml -o %t/call_sites.exe
 
-# RUN: llvm-gsymutil --convert=%t/call_sites.exe --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_exe.gsym
+# RUN: llvm-gsymutil --num-threads=1 --convert=%t/call_sites.exe --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_exe.gsym
 
 # Dump the GSYM file and check the output for callsite information
 # RUN: llvm-gsymutil %t/call_sites_exe.gsym | FileCheck --check-prefix=CHECK-GSYM %s
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test
index 2d082ed6477f1..64c18669264ef 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test
@@ -2,7 +2,7 @@
 
 // Assemble the input assembly code into an object file
 // RUN: llc -enable-machine-outliner=never -mtriple arm64-apple-darwin -filetype=obj %t/call_sites.ll -o %t/call_sites.o
-// RUN: llvm-gsymutil --convert=%t/call_sites.o --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_obj.gsym
+// RUN: llvm-gsymutil --num-threads=1 --convert=%t/call_sites.o --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_obj.gsym
 
 // Dump the GSYM file and check the output for callsite information
 // RUN: llvm-gsymutil %t/call_sites_obj.gsym | FileCheck --check-prefix=CHECK-GSYM %s
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
index 4cecc79c72b4b..5001ffdeab9e2 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -3,8 +3,8 @@
 # RUN: split-file %s %t
 # RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM
 
-# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
-# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym
+# RUN: llvm-gsymutil --num-threads=1 --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
+# RUN: llvm-gsymutil --num-threads=1 --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym
 
 # Dump the GSYM file and check the output for callsite information
 # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
index 522c576854421..6bf359cbe1ee1 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
@@ -1,11 +1,11 @@
 # RUN: yaml2obj %s -o %t.dSYM
 
 ## Verify that we don't keep merged functions by default
-# RUN: llvm-gsymutil --convert %t.dSYM --out-file=%t.default.gSYM
+# RUN: llvm-gsymutil --num-threads=1 --convert %t.dSYM --out-file=%t.default.gSYM
 # RUN: llvm-gsymutil --verify --verbose %t.default.gSYM | FileCheck --check-prefix=CHECK-GSYM-DEFAULT %s
 
 ## Verify that we keep merged functions when specyfing --merged-functions
-# RUN: llvm-gsymutil --convert %t.dSYM --out-file=%t.keep.gSYM --merged-functions
+# RUN: llvm-gsymutil --num-threads=1 --convert %t.dSYM --out-file=%t.keep.gSYM --merged-functions
 # RUN: llvm-gsymutil --verify --verbose %t.keep.gSYM | FileCheck --check-prefix=CHECK-GSYM-KEEP %s
 
 ## Note: For identical functions, the dSYM / gSYM cannot be counted on to be deterministic.
@@ -13,7 +13,7 @@
 
 
 # CHECK-GSYM-DEFAULT-NOT: Merged FunctionInfos
-# CHECK-GSYM-DEFAULT:      FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-DEFAULT:      FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_03"
 
 
 # CHECK-GSYM-KEEP:      Address Table:
@@ -30,52 +30,51 @@
 # CHECK-GSYM-KEEP-NEXT: INDEX  DIRECTORY  BASENAME   PATH
 # CHECK-GSYM-KEEP-NEXT: ====== ========== ========== ==============================
 # CHECK-GSYM-KEEP-NEXT: [   0] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}}
-# CHECK-GSYM-KEEP-NEXT: [   1] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp
-# CHECK-GSYM-KEEP-NEXT: [   2] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp
-# CHECK-GSYM-KEEP-NEXT: [   3] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp
+# CHECK-GSYM-KEEP-NEXT: [   1] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp
+# CHECK-GSYM-KEEP-NEXT: [   2] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp
+# CHECK-GSYM-KEEP-NEXT: [   3] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp
 
-# CHECK-GSYM-KEEP:      FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-KEEP:      FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_01"
 # CHECK-GSYM-KEEP-NEXT: LineTable:
-# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5
-# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:7
-# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:9
-# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:8
-# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:11
-# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10
-# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6
+# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:5
+# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:7
+# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:9
+# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:8
+# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:11
+# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:10
+# CHECK-GSYM-KEEP-NEXT:   0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:6
 # CHECK-GSYM-KEEP-NEXT: ++ Merged FunctionInfos[0]:
-# CHECK-GSYM-KEEP-NEXT:     [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-KEEP-NEXT:     [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_02"
 # CHECK-GSYM-KEEP-NEXT:     LineTable:
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:7
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:9
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:8
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:11
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:5
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:7
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:9
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:8
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:11
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:10
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:6
 # CHECK-GSYM-KEEP-NEXT: ++ Merged FunctionInfos[1]:
-# CHECK-GSYM-KEEP-NEXT:     [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-KEEP-NEXT:     [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_03"
 # CHECK-GSYM-KEEP-NEXT:     LineTable:
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:7
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:9
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:8
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:11
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10
-# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:5
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:7
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:9
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:8
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:11
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:10
+# CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:6
 
 ## Test the lookup functionality for merged functions:
 # RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s
 # RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s
 
-#### TODO: Fix non-determinism leading that is currently worked around with `{{[1-3]}}` below.
 
 # CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248:
-# CHECK-MERGED-LOOKUP-NEXT:       0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5
-# CHECK-MERGED-LOOKUP-NEXT-NEXT:  0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5
-# CHECK-MERGED-LOOKUP-NEXT-NEXT:  0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5
+# CHECK-MERGED-LOOKUP-NEXT:       0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:5
+# CHECK-MERGED-LOOKUP-NEXT-NEXT:  0x0000000000000248: my_func_02 @ /tmp/test_gsym_yaml{{[/\\]}}out/file_02.cpp:5
+# CHECK-MERGED-LOOKUP-NEXT-NEXT:  0x0000000000000248: my_func_03 @ /tmp/test_gsym_yaml{{[/\\]}}out/file_03.cpp:5
  
-# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_0{{[1-3]}} @ /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:5
+# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml{{[/\\]}}out/file_01.cpp:5
 
 
 --- !mach-o

From c1c50c7a3ef9ced4a9b01e0afd83b83d7060fff9 Mon Sep 17 00:00:00 2001
From: Thor Preimesberger <111035711+cheezeburglar@users.noreply.github.com>
Date: Thu, 9 Jan 2025 17:20:48 -0600
Subject: [PATCH 73/79] [SDPatternMatch] Add matchers m_ExtractSubvector and
 m_InsertSubvector (#120212)

Fixes #118846
---
 llvm/include/llvm/CodeGen/SDPatternMatch.h    | 12 ++++++
 .../CodeGen/SelectionDAGPatternMatchTest.cpp  | 38 +++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index fc8ef717c74f6..2ccefa33abbf8 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -514,6 +514,12 @@ m_InsertElt(const T0_P &Vec, const T1_P &Val, const T2_P &Idx) {
                                             Idx);
 }
 
+template <typename LHS, typename RHS, typename IDX>
+inline TernaryOpc_match<LHS, RHS, IDX>
+m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx) {
+  return TernaryOpc_match<LHS, RHS, IDX>(ISD::INSERT_SUBVECTOR, Base, Sub, Idx);
+}
+
 // === Binary operations ===
 template <typename LHS_P, typename RHS_P, bool Commutable = false,
           bool ExcludeChain = false>
@@ -846,6 +852,12 @@ inline BinaryOpc_match<LHS, RHS> m_ExtractElt(const LHS &Vec, const RHS &Idx) {
   return BinaryOpc_match<LHS, RHS>(ISD::EXTRACT_VECTOR_ELT, Vec, Idx);
 }
 
+template <typename LHS, typename RHS>
+inline BinaryOpc_match<LHS, RHS> m_ExtractSubvector(const LHS &Vec,
+                                                    const RHS &Idx) {
+  return BinaryOpc_match<LHS, RHS>(ISD::EXTRACT_SUBVECTOR, Vec, Idx);
+}
+
 // === Unary operations ===
 template <typename Opnd_P, bool ExcludeChain = false> struct UnaryOpc_match {
   unsigned Opcode;
diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
index a2e1e588d03de..9b759ef19efe1 100644
--- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
+++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
@@ -165,9 +165,15 @@ TEST_F(SelectionDAGPatternMatchTest, matchTernaryOp) {
   SDValue Select = DAG->getSelect(DL, MVT::i1, Cond, T, F);
 
   auto VInt32VT = EVT::getVectorVT(Context, Int32VT, 4);
+  auto SmallVInt32VT = EVT::getVectorVT(Context, Int32VT, 2);
+  auto Idx0 = DAG->getVectorIdxConstant(0, DL);
+  auto Idx3 = DAG->getVectorIdxConstant(3, DL);
   SDValue V1 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 6, VInt32VT);
   SDValue V2 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 7, VInt32VT);
+  SDValue V3 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 8, SmallVInt32VT);
   SDValue VSelect = DAG->getNode(ISD::VSELECT, DL, VInt32VT, Cond, V1, V2);
+  SDValue InsertSubvector =
+      DAG->getNode(ISD::INSERT_SUBVECTOR, DL, VInt32VT, V2, V3, Idx0);
 
   SDValue ExtractELT =
       DAG->getNode(ISD::EXTRACT_VECTOR_ELT, DL, Int32VT, V1, Op3);
@@ -209,15 +215,33 @@ TEST_F(SelectionDAGPatternMatchTest, matchTernaryOp) {
   EXPECT_TRUE(sd_match(ExtractELT, m_ExtractElt(m_Value(), m_Value())));
   EXPECT_TRUE(sd_match(ExtractELT, m_ExtractElt(m_Value(), m_ConstInt())));
   EXPECT_TRUE(sd_match(ExtractELT, m_ExtractElt(m_Value(), m_SpecificInt(1))));
+
+  EXPECT_TRUE(sd_match(InsertSubvector,
+                       m_InsertSubvector(m_Value(), m_Value(), m_Value())));
+  EXPECT_TRUE(sd_match(
+      InsertSubvector,
+      m_InsertSubvector(m_Specific(V2), m_Specific(V3), m_Specific(Idx0))));
+  EXPECT_TRUE(sd_match(
+      InsertSubvector,
+      m_InsertSubvector(m_Specific(V2), m_Specific(V3), m_SpecificInt(0))));
+  EXPECT_FALSE(sd_match(
+      InsertSubvector,
+      m_InsertSubvector(m_Specific(V2), m_Specific(V3), m_Specific(Idx3))));
+  EXPECT_FALSE(sd_match(
+      InsertSubvector,
+      m_InsertSubvector(m_Specific(V2), m_Specific(V3), m_SpecificInt(3))));
 }
 
 TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) {
   SDLoc DL;
   auto Int32VT = EVT::getIntegerVT(Context, 32);
   auto Float32VT = EVT::getFloatingPointVT(32);
+  auto BigVInt32VT = EVT::getVectorVT(Context, Int32VT, 8);
   auto VInt32VT = EVT::getVectorVT(Context, Int32VT, 4);
 
   SDValue V1 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 6, VInt32VT);
+  auto Idx0 = DAG->getVectorIdxConstant(0, DL);
+  auto Idx1 = DAG->getVectorIdxConstant(1, DL);
 
   SDValue Op0 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT);
   SDValue Op1 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 2, Int32VT);
@@ -260,6 +284,10 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) {
   SDValue SFAdd = DAG->getNode(ISD::STRICT_FADD, DL, {Float32VT, MVT::Other},
                                {DAG->getEntryNode(), Op2, Op2});
 
+  SDValue Vec = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 9, BigVInt32VT);
+  SDValue SubVec =
+      DAG->getNode(ISD::EXTRACT_SUBVECTOR, DL, VInt32VT, Vec, Idx0);
+
   SDValue InsertELT =
       DAG->getNode(ISD::INSERT_VECTOR_ELT, DL, VInt32VT, V1, Op0, Op4);
 
@@ -320,6 +348,16 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) {
   EXPECT_FALSE(sd_match(SFAdd, m_ChainedBinOp(ISD::STRICT_FADD, m_OtherVT(),
                                               m_SpecificVT(Float32VT))));
 
+  EXPECT_TRUE(sd_match(SubVec, m_ExtractSubvector(m_Value(), m_Value())));
+  EXPECT_TRUE(
+      sd_match(SubVec, m_ExtractSubvector(m_Specific(Vec), m_Specific(Idx0))));
+  EXPECT_TRUE(
+      sd_match(SubVec, m_ExtractSubvector(m_Specific(Vec), m_SpecificInt(0))));
+  EXPECT_FALSE(
+      sd_match(SubVec, m_ExtractSubvector(m_Specific(Vec), m_Specific(Idx1))));
+  EXPECT_FALSE(
+      sd_match(SubVec, m_ExtractSubvector(m_Specific(Vec), m_SpecificInt(1))));
+
   EXPECT_TRUE(
       sd_match(InsertELT, m_InsertElt(m_Value(), m_Value(), m_Value())));
   EXPECT_TRUE(

From 504f6ce0c25b5cd721622cd444e9c428f400fd73 Mon Sep 17 00:00:00 2001
From: Congcong Cai <congcongcai0907@163.com>
Date: Fri, 10 Jan 2025 07:35:50 +0800
Subject: [PATCH 74/79] [clang-tidy][NFC] clean readability-use-std-min-max
 (#122288)

1. add `static` for internal linkage functions
2. remove `clang` prefix for `QualType`
---
 .../readability/UseStdMinMaxCheck.cpp         | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp b/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp
index 9c5c2f3939c99..179173502a8d0 100644
--- a/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/UseStdMinMaxCheck.cpp
@@ -59,7 +59,7 @@ static bool maxCondition(const BinaryOperator::Opcode Op, const Expr *CondLhs,
   return false;
 }
 
-QualType getNonTemplateAlias(QualType QT) {
+static QualType getNonTemplateAlias(QualType QT) {
   while (true) {
     // cast to a TypedefType
     if (const TypedefType *TT = dyn_cast<TypedefType>(QT)) {
@@ -92,15 +92,15 @@ static std::string createReplacement(const Expr *CondLhs, const Expr *CondRhs,
   const llvm::StringRef AssignLhsStr = Lexer::getSourceText(
       Source.getExpansionRange(AssignLhs->getSourceRange()), Source, LO);
 
-  clang::QualType GlobalImplicitCastType;
-  clang::QualType LhsType = CondLhs->getType()
-                                .getCanonicalType()
-                                .getNonReferenceType()
-                                .getUnqualifiedType();
-  clang::QualType RhsType = CondRhs->getType()
-                                .getCanonicalType()
-                                .getNonReferenceType()
-                                .getUnqualifiedType();
+  QualType GlobalImplicitCastType;
+  QualType LhsType = CondLhs->getType()
+                         .getCanonicalType()
+                         .getNonReferenceType()
+                         .getUnqualifiedType();
+  QualType RhsType = CondRhs->getType()
+                         .getCanonicalType()
+                         .getNonReferenceType()
+                         .getUnqualifiedType();
   if (LhsType != RhsType) {
     GlobalImplicitCastType = getNonTemplateAlias(BO->getLHS()->getType());
   }

From a6aa9365f75c6f28c3d281c662dcdb6fb5222601 Mon Sep 17 00:00:00 2001
From: Mingming Liu <mingmingl@google.com>
Date: Thu, 9 Jan 2025 15:57:32 -0800
Subject: [PATCH 75/79] [NFC][AsmPrinter] Pass MJTI by const reference instead
 of const pointer (#122365)

The caller `AsmPrinter::emitJumpTableInfo` checks [1] `MJTI` is not a
null pointer before calling `emitJumpTableEntry` or
`emitJumpTableSizesSection`.

This patch updates callee function's signature to accept const
reference, this way it's explicit `MJTI` won't be nullptr inside the
callee.

[1]
https://github.com/llvm/llvm-project/blob/9d5299eb61a64cd4df5fefa0299b0cf8d917978f/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp#L2857
---
 llvm/include/llvm/CodeGen/AsmPrinter.h     |  4 ++--
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 18 +++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index c9a88d7b1c015..bf491096e3c47 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -892,10 +892,10 @@ class AsmPrinter : public MachineFunctionPass {
   // Internal Implementation Details
   //===------------------------------------------------------------------===//
 
-  void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+  void emitJumpTableEntry(const MachineJumpTableInfo &MJTI,
                           const MachineBasicBlock *MBB, unsigned uid) const;
 
-  void emitJumpTableSizesSection(const MachineJumpTableInfo *MJTI,
+  void emitJumpTableSizesSection(const MachineJumpTableInfo &MJTI,
                                  const Function &F) const;
 
   void emitLLVMUsedList(const ConstantArray *InitList);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3ba45900e4569..55c1d12a6fa8f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2922,19 +2922,19 @@ void AsmPrinter::emitJumpTableInfo() {
     // Defer MCAssembler based constant folding due to a performance issue. The
     // label differences will be evaluated at write time.
     for (const MachineBasicBlock *MBB : JTBBs)
-      emitJumpTableEntry(MJTI, MBB, JTI);
+      emitJumpTableEntry(*MJTI, MBB, JTI);
   }
 
   if (EmitJumpTableSizesSection)
-    emitJumpTableSizesSection(MJTI, F);
+    emitJumpTableSizesSection(*MJTI, F);
 
   if (!JTInDiffSection)
     OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
 }
 
-void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo *MJTI,
+void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo &MJTI,
                                            const Function &F) const {
-  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI.getJumpTables();
 
   if (JT.empty())
     return;
@@ -2982,17 +2982,17 @@ void AsmPrinter::emitJumpTableSizesSection(const MachineJumpTableInfo *MJTI,
 
 /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
 /// current stream.
-void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo &MJTI,
                                     const MachineBasicBlock *MBB,
                                     unsigned UID) const {
   assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
   const MCExpr *Value = nullptr;
-  switch (MJTI->getEntryKind()) {
+  switch (MJTI.getEntryKind()) {
   case MachineJumpTableInfo::EK_Inline:
     llvm_unreachable("Cannot emit EK_Inline jump table entry");
   case MachineJumpTableInfo::EK_Custom32:
     Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry(
-        MJTI, MBB, UID, OutContext);
+        &MJTI, MBB, UID, OutContext);
     break;
   case MachineJumpTableInfo::EK_BlockAddress:
     // EK_BlockAddress - Each entry is a plain address of block, e.g.:
@@ -3026,7 +3026,7 @@ void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     // If the .set directive avoids relocations, this is emitted as:
     //      .set L4_5_set_123, LBB123 - LJTI1_2
     //      .word L4_5_set_123
-    if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+    if (MJTI.getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
         MAI->doesSetDirectiveSuppressReloc()) {
       Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()),
                                       OutContext);
@@ -3042,7 +3042,7 @@ void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
 
   assert(Value && "Unknown entry kind!");
 
-  unsigned EntrySize = MJTI->getEntrySize(getDataLayout());
+  unsigned EntrySize = MJTI.getEntrySize(getDataLayout());
   OutStreamer->emitValue(Value, EntrySize);
 }
 

From 5d88a84ecddab3471693e44b57a1c1f21ce14f3f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 9 Jan 2025 15:46:44 -0800
Subject: [PATCH 76/79] [RISCV] Simplify some RISCVInstrInfoC classes by
 removing arguments that never change. NFC

---
 llvm/lib/Target/RISCV/RISCVInstrInfoC.td  | 38 +++++++++++------------
 llvm/lib/Target/RISCV/RISCVInstrInfoZc.td |  2 +-
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index ce994206cd785..4438957b0dc90 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -259,9 +259,8 @@ class CStore_rri<bits<3> funct3, string OpcodeStr,
                  OpcodeStr, "$rs2, ${imm}(${rs1})">;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class Bcz<bits<3> funct3, string OpcodeStr,
-          RegisterClass cls>
-    : RVInst16CB<funct3, 0b01, (outs), (ins cls:$rs1, simm9_lsb0:$imm),
+class Bcz<bits<3> funct3, string OpcodeStr>
+    : RVInst16CB<funct3, 0b01, (outs), (ins GPRC:$rs1, simm9_lsb0:$imm),
                  OpcodeStr, "$rs1, $imm"> {
   let isBranch = 1;
   let isTerminator = 1;
@@ -273,9 +272,9 @@ class Bcz<bits<3> funct3, string OpcodeStr,
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class Shift_right<bits<2> funct2, string OpcodeStr, RegisterClass cls,
-                  Operand ImmOpnd>
-    : RVInst16CB<0b100, 0b01, (outs cls:$rd), (ins cls:$rs1, ImmOpnd:$imm),
+class Shift_right<bits<2> funct2, string OpcodeStr>
+    : RVInst16CB<0b100, 0b01, (outs GPRC:$rd),
+                 (ins GPRC:$rs1, uimmlog2xlennonzero:$imm),
                  OpcodeStr, "$rs1, $imm"> {
   let Constraints = "$rs1 = $rd";
   let Inst{12} = imm{5};
@@ -284,10 +283,9 @@ class Shift_right<bits<2> funct2, string OpcodeStr, RegisterClass cls,
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class CA_ALU<bits<6> funct6, bits<2> funct2, string OpcodeStr,
-             RegisterClass cls>
-    : RVInst16CA<funct6, funct2, 0b01, (outs cls:$rd_wb), (ins cls:$rd, cls:$rs2),
-                 OpcodeStr, "$rd, $rs2"> {
+class CA_ALU<bits<6> funct6, bits<2> funct2, string OpcodeStr>
+    : RVInst16CA<funct6, funct2, 0b01, (outs GPRC:$rd_wb),
+                 (ins GPRC:$rd, GPRC:$rs2), OpcodeStr, "$rd, $rs2"> {
   bits<3> rd;
   let Constraints = "$rd = $rd_wb";
   let Inst{9-7} = rd;
@@ -465,9 +463,9 @@ def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd),
   let Inst{6-2} = imm{4-0};
 }
 
-def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>,
+def C_SRLI : Shift_right<0b00, "c.srli">,
              Sched<[WriteShiftImm, ReadShiftImm]>;
-def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>,
+def C_SRAI : Shift_right<0b01, "c.srai">,
              Sched<[WriteShiftImm, ReadShiftImm]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -480,19 +478,19 @@ def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), (ins GPRC:$rs1, simm6:$imm
   let Inst{6-2} = imm{4-0};
 }
 
-def C_SUB  : CA_ALU<0b100011, 0b00, "c.sub", GPRC>,
+def C_SUB  : CA_ALU<0b100011, 0b00, "c.sub">,
              Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def C_XOR  : CA_ALU<0b100011, 0b01, "c.xor", GPRC>,
+def C_XOR  : CA_ALU<0b100011, 0b01, "c.xor">,
              Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def C_OR   : CA_ALU<0b100011, 0b10, "c.or" , GPRC>,
+def C_OR   : CA_ALU<0b100011, 0b10, "c.or">,
              Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def C_AND  : CA_ALU<0b100011, 0b11, "c.and", GPRC>,
+def C_AND  : CA_ALU<0b100011, 0b11, "c.and">,
              Sched<[WriteIALU, ReadIALU, ReadIALU]>;
 
 let Predicates = [HasStdExtCOrZca, IsRV64] in {
-def C_SUBW : CA_ALU<0b100111, 0b00, "c.subw", GPRC>,
+def C_SUBW : CA_ALU<0b100111, 0b00, "c.subw">,
              Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
-def C_ADDW : CA_ALU<0b100111, 0b01, "c.addw", GPRC>,
+def C_ADDW : CA_ALU<0b100111, 0b01, "c.addw">,
              Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
 }
 
@@ -504,8 +502,8 @@ def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset),
   let isBarrier=1;
 }
 
-def C_BEQZ : Bcz<0b110, "c.beqz", GPRC>, Sched<[WriteJmp, ReadJmp]>;
-def C_BNEZ : Bcz<0b111, "c.bnez", GPRC>, Sched<[WriteJmp, ReadJmp]>;
+def C_BEQZ : Bcz<0b110, "c.beqz">, Sched<[WriteJmp, ReadJmp]>;
+def C_BNEZ : Bcz<0b111, "c.bnez">, Sched<[WriteJmp, ReadJmp]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb),
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
index bff740a33c1c1..5cc16765d4ae2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
@@ -186,7 +186,7 @@ def C_ZEXT_B  : RVZcArith_r<0b11000 , "c.zext.b">,
                 Sched<[WriteIALU, ReadIALU]>;
 
 let Predicates = [HasStdExtZcb, HasStdExtZmmul] in
-def C_MUL     : CA_ALU<0b100111, 0b10, "c.mul", GPRC>,
+def C_MUL     : CA_ALU<0b100111, 0b10, "c.mul">,
                 Sched<[WriteIMul, ReadIMul, ReadIMul]>;
 
 let Predicates = [HasStdExtZcb] in {

From 24bf0e4eb63fe5eebf86f937ff41b66be78cf958 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Fri, 10 Jan 2025 01:37:24 +0100
Subject: [PATCH 77/79] [libc++] Disable _LIBCPP_NODEBUG temporarily (#122393)

This should be reverted once the crash reported in #118710 has been
analyzed.
---
 libcxx/include/__config                               | 4 +++-
 libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index ace6e1cd73e3e..2de4c009b5afd 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1170,7 +1170,9 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_NOESCAPE
 #  endif
 
-#  define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]]
+// FIXME: Expand this to [[__gnu__::__nodebug__]] again once the testcase reported in
+// https://github.com/llvm/llvm-project/pull/118710 has been analyzed
+#  define _LIBCPP_NODEBUG
 
 #  if __has_attribute(__standalone_debug__)
 #    define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__))
diff --git a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp
index bc7c8ce7ec443..f49f3e3c615ca 100644
--- a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp
+++ b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp
@@ -27,7 +27,7 @@ class LibcxxTestModule : public clang::tidy::ClangTidyModule {
     check_factories.registerCheck<libcpp::header_exportable_declarations>("libcpp-header-exportable-declarations");
     check_factories.registerCheck<libcpp::hide_from_abi>("libcpp-hide-from-abi");
     check_factories.registerCheck<libcpp::internal_ftm_use>("libcpp-internal-ftms");
-    check_factories.registerCheck<libcpp::nodebug_on_aliases>("libcpp-nodebug-on-aliases");
+    // check_factories.registerCheck<libcpp::nodebug_on_aliases>("libcpp-nodebug-on-aliases");
     check_factories.registerCheck<libcpp::proper_version_checks>("libcpp-cpp-version-check");
     check_factories.registerCheck<libcpp::robust_against_adl_check>("libcpp-robust-against-adl");
     check_factories.registerCheck<libcpp::uglify_attributes>("libcpp-uglify-attributes");

From 04e54cc19f13af26e453c06b6a9e2bc3187e52c2 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 19:39:12 -0500
Subject: [PATCH 78/79] [RISCV][VLOPT] Add Vector Single-Width Averaging Add
 and Subtract to isSupportedInstr (#122351)

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   |   9 +
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 176 +++++++++++++++++++
 2 files changed, 185 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index ab73b112e6dd8..9870279ad17c7 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -963,6 +963,15 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   case RISCV::VMV_V_I:
   case RISCV::VMV_V_X:
   case RISCV::VMV_V_V:
+  // Vector Single-Width Averaging Add and Subtract
+  case RISCV::VAADDU_VV:
+  case RISCV::VAADDU_VX:
+  case RISCV::VAADD_VV:
+  case RISCV::VAADD_VX:
+  case RISCV::VASUBU_VV:
+  case RISCV::VASUBU_VX:
+  case RISCV::VASUB_VV:
+  case RISCV::VASUB_VX:
 
   // Vector Crypto
   case RISCV::VWSLL_VI:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index da7917cae8ba6..ce79bd5d5ddcf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3595,3 +3595,179 @@ define <vscale x 4 x i32> @vmerge_vim(<vscale x 4 x i32> %a, <vscale x 4 x i1> %
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
+
+define <vscale x 4 x i32> @vaadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vaadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vaadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vaadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vaadd.vv v8, v8, v10
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vaadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vaadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vaadd_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vaadd.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vaadd_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vaadd.vx v10, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vaadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vasub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vasub_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vasub.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vasub_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vasub.vv v8, v8, v10
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vasub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vasub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vasub_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vasub.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vasub_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vasub.vx v10, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vasub.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vaaddu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vaaddu_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vaaddu.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vaaddu_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vaaddu.vv v8, v8, v10
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vaaddu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vaaddu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vaaddu_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vaaddu.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vaaddu_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vaaddu.vx v10, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vaaddu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vasubu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vasubu_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vasubu.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vasubu_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vasubu.vv v8, v8, v10
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vasubu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vasubu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vasubu_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vasubu.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vasubu_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vasubu.vx v10, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vasubu.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}

From d0373dbe7c8ca8e7cd9c84e1d93c43c81a085f0c Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Thu, 9 Jan 2025 19:44:40 -0500
Subject: [PATCH 79/79] [RISCV][VLOPT] Add vadc to isSupportedInstr (#122345)

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   |  4 ++
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 60 ++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 9870279ad17c7..9338e0a1c8741 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -948,6 +948,10 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   case RISCV::VMERGE_VIM:
   case RISCV::VMERGE_VVM:
   case RISCV::VMERGE_VXM:
+  // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+  case RISCV::VADC_VIM:
+  case RISCV::VADC_VVM:
+  case RISCV::VADC_VXM:
   // Vector Widening Integer Multiply-Add Instructions
   case RISCV::VWMACCU_VV:
   case RISCV::VWMACCU_VX:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index ce79bd5d5ddcf..ce94e1c193645 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3596,6 +3596,66 @@ define <vscale x 4 x i32> @vmerge_vim(<vscale x 4 x i32> %a, <vscale x 4 x i1> %
   ret <vscale x 4 x i32> %2
 }
 
+define <vscale x 4 x i32> @vadc_vvm(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vadc_vvm:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadc.vvm v8, v8, v10, v0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vadc_vvm:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vadc.vvm v8, v8, v10, v0
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vadc.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %c, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vadc_vxm(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i1> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vadc_vxm:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadc.vxm v8, v8, a0, v0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vadc_vxm:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vadc.vxm v8, v8, a0, v0
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vadc.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i1> %c, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vadc_vim(<vscale x 4 x i32> %a, <vscale x 4 x i1> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vadc_vim:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadc.vim v8, v8, 9, v0
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vadc_vim:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vadc.vim v8, v8, 9, v0
+; VLOPT-NEXT:    vadd.vv v8, v8, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vadc.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 9, <vscale x 4 x i1> %c, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
 define <vscale x 4 x i32> @vaadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
 ; NOVLOPT-LABEL: vaadd_vv:
 ; NOVLOPT:       # %bb.0: