From d28a4f1fc02dc34a87fa22af0a053e8f1e7f6cea Mon Sep 17 00:00:00 2001 From: lonely eagle <2020382038@qq.com> Date: Fri, 17 Jan 2025 21:21:41 +0800 Subject: [PATCH 01/88] [mlir][affine]introducing new symbol rules that the result of a `Pure` operation that whose operands are valid symbolic identifiers (#118478) introducing new symbol rules that the result of a Pure operation that whose operands are valid symbolic identifiers. --- mlir/docs/Dialects/Affine.md | 4 +- mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 12 ++- .../SuperVectorize/vectorize_reduction.mlir | 6 +- mlir/test/Dialect/Affine/invalid.mlir | 44 ---------- mlir/test/Dialect/Affine/ops.mlir | 85 +++++++++++++++++++ mlir/test/Dialect/GPU/transform-gpu.mlir | 80 ++++++++--------- .../Linalg/convert-conv2d-to-img2col.mlir | 24 +++--- mlir/test/Dialect/Linalg/tile-indexed.mlir | 10 +-- .../Dialect/Linalg/transform-op-split.mlir | 4 +- .../TilingInterface/tile-using-interface.mlir | 6 +- .../TilingInterface/tile-using-scfforall.mlir | 6 +- 11 files changed, 162 insertions(+), 119 deletions(-) diff --git a/mlir/docs/Dialects/Affine.md b/mlir/docs/Dialects/Affine.md index bfcbbf5bb3b13..0b6d7747e8a6f 100644 --- a/mlir/docs/Dialects/Affine.md +++ b/mlir/docs/Dialects/Affine.md @@ -69,9 +69,7 @@ immediately enclosed by the latter), 3. a value that dominates the `AffineScope` op enclosing the value's use, 4. the result of a constant operation, -5. the result of an -[`affine.apply` operation](#affineapply-mliraffineapplyop) that recursively takes as -arguments any valid symbolic identifiers, or +5. the result of a `Pure` operation whose operands are valid symbolic identifiers. 6. the result of a [`dim` operation](MemRef.md/#memrefdim-mlirmemrefdimop) on either a memref that is an argument to a `AffineScope` op or a memref where the corresponding diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index aa2c2041f411f..147f5dd7a24b6 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -410,7 +410,8 @@ bool mlir::affine::isValidSymbol(Value value) { /// A value can be used as a symbol for `region` iff it meets one of the /// following conditions: /// *) It is a constant. -/// *) It is the result of an affine apply operation with symbol arguments. +/// *) It is a result of a `Pure` operation whose operands are valid symbolic +/// *) identifiers. /// *) It is a result of the dim op on a memref whose corresponding size is /// a valid symbol. /// *) It is defined at the top level of 'region' or is its argument. @@ -443,9 +444,12 @@ bool mlir::affine::isValidSymbol(Value value, Region *region) { if (matchPattern(defOp, m_Constant(&operandCst))) return true; - // Affine apply operation is ok if all of its operands are ok. - if (auto applyOp = dyn_cast(defOp)) - return applyOp.isValidSymbol(region); + // `Pure` operation that whose operands are valid symbolic identifiers. + if (isPure(defOp) && llvm::all_of(defOp->getOperands(), [&](Value operand) { + return affine::isValidSymbol(operand, region); + })) { + return true; + } // Dim op results could be valid symbols at any level. if (auto dimOp = dyn_cast(defOp)) diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir index 29c42fcd50bd7..b616632a6fe24 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir @@ -638,13 +638,13 @@ func.func @vecdim_reduction_complex_ub(%in: memref<256x512xf32>, %out: memref<25 return } -// CHECK: #[[$map3:.*]] = affine_map<([[d0:.*]], [[d1:.*]]) -> ([[d0]], [[d1]] * 2)> -// CHECK: #[[$map3_sub:.*]] = affine_map<([[d0:.*]], [[d1:.*]]) -> ([[d0]] - [[d1]])> +// CHECK: #[[$map3:.*]] = affine_map<(d0, d1) -> (d0, d1 * 2)> +// CHECK: #[[$map3_sub:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> // CHECK-LABEL: @vecdim_reduction_complex_ub // CHECK: %[[vzero:.*]] = arith.constant dense<0.000000e+00> : vector<128xf32> // CHECK: %{{.*}} = affine.for %[[iv:.*]] = 0 to min #[[$map3]](%[[M:.*]], %[[N:.*]]) step 128 iter_args(%[[red_iter:.*]] = {{.*}}) -> (vector<128xf32>) { // CHECK: %[[ub:.*]] = affine.min #[[$map3]](%[[M]], %[[N]]) -// CHECK: %[[elems_left:.*]] = affine.apply #[[$map3_sub]](%[[ub]], %[[iv]]) +// CHECK: %[[elems_left:.*]] = affine.apply #[[$map3_sub]](%[[iv]])[%[[ub]]] // CHECK: %[[mask:.*]] = vector.create_mask %[[elems_left]] : vector<128xi1> // CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xf32>, vector<128xf32> // CHECK: %[[select:.*]] = arith.select %[[mask]], %[[ld]], %[[vzero]] : vector<128xi1>, vector<128xf32> diff --git a/mlir/test/Dialect/Affine/invalid.mlir b/mlir/test/Dialect/Affine/invalid.mlir index 1539b4f484827..44e484b9ba598 100644 --- a/mlir/test/Dialect/Affine/invalid.mlir +++ b/mlir/test/Dialect/Affine/invalid.mlir @@ -20,36 +20,6 @@ func.func @affine_apply_resul_non_index(%arg0 : index) { return } -// ----- - -#map = affine_map<(d0)[s0] -> (d0 + s0)> - -func.func @affine_for_lower_bound_invalid_dim(%arg : index) { - affine.for %n0 = 0 to 7 { - %dim = arith.addi %arg, %arg : index - - // expected-error@+1 {{operand cannot be used as a dimension id}} - affine.for %n1 = 0 to #map(%dim)[%arg] { - } - } - return -} - -// ----- - -#map = affine_map<(d0)[s0] -> (d0 + s0)> - -func.func @affine_for_upper_bound_invalid_dim(%arg : index) { - affine.for %n0 = 0 to 7 { - %dim = arith.addi %arg, %arg : index - - // expected-error@+1 {{operand cannot be used as a dimension id}} - affine.for %n1 = #map(%dim)[%arg] to 7 { - } - } - return -} - // ----- func.func @affine_load_invalid_dim(%M : memref<10xi32>) { "unknown"() ({ @@ -93,20 +63,6 @@ func.func @affine_for_upper_bound_invalid_sym() { #set0 = affine_set<(i)[N] : (i >= 0, N - i >= 0)> -func.func @affine_if_invalid_dim(%arg : index) { - affine.for %n0 = 0 to 7 { - %dim = arith.addi %arg, %arg : index - - // expected-error@+1 {{operand cannot be used as a dimension id}} - affine.if #set0(%dim)[%n0] {} - } - return -} - -// ----- - -#set0 = affine_set<(i)[N] : (i >= 0, N - i >= 0)> - func.func @affine_if_invalid_sym() { affine.for %i0 = 0 to 7 { // expected-error@+1 {{operand cannot be used as a symbol}} diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir index c6bfb688db1c1..e3721806989bb 100644 --- a/mlir/test/Dialect/Affine/ops.mlir +++ b/mlir/test/Dialect/Affine/ops.mlir @@ -324,3 +324,88 @@ module attributes {gpu.container_module} { // CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 { // CHECK: } // CHECK: gpu.return + +// ----- + +#map = affine_map<()[s0] -> (s0 mod 32)> + +// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 mod 32)> + +// CHECK-LABEL: gpu.func @affine_thread_id + +module { + gpu.module @gpu { + gpu.func @affine_thread_id(%arg0: memref) kernel { + %c3 = arith.constant 3 : index + %dim = memref.dim %arg0, %c3 : memref + %c0 = arith.constant 0 : index + affine.for %arg3 = %c0 to %dim step 32 { + %thread_id_x = gpu.thread_id x + %0 = affine.apply #map()[%thread_id_x] + %c128 = arith.constant 128 : index + affine.for %arg4 = %0 to %c128 step 8 { + %c32 = arith.constant 32 : index + } + } + gpu.return + } + } +} + +// CHECK-SAME: (%[[VAL_0:.*]]: memref) kernel { +// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 { +// CHECK: %[[VAL_5:.*]] = gpu.thread_id x +// CHECK: %[[VAL_6:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[VAL_5]]] +// CHECK: %[[VAL_7:.*]] = arith.constant 128 : index +// CHECK: affine.for %{{.*}} = %[[VAL_6]] to %[[VAL_7]] step 8 { + +// ----- + +#map = affine_map<(d0)[s0] -> (d0 + s0)> + +// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> + +// CHECK-LABEL: func @arith_add_vaild_symbol_upper_bound + +func.func @arith_add_vaild_symbol_upper_bound(%arg : index) { + affine.for %n0 = 0 to 7 { + %dim = arith.addi %arg, %arg : index + affine.for %n1 = 0 to #map(%dim)[%arg] { + } + } + return +} + +// CHECK-SAME: %[[VAL_0:.*]]: index) { +// CHECK: affine.for %[[VAL_1:.*]] = 0 to 7 { +// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_0]] : index +// CHECK: affine.for %[[VAL_3:.*]] = 0 to #[[$ATTR_0]](%[[VAL_2]]){{\[}}%[[VAL_0]]] { +// CHECK: } +// CHECK: } + +// ----- + +#map = affine_map<(d0)[s0] -> (d0 + s0)> + +// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> + +// CHECK-LABEL: func @arith_add_vaild_symbol_lower_bound + +func.func @arith_add_vaild_symbol_lower_bound(%arg : index) { + affine.for %n0 = 0 to 7 { + %dim = arith.addi %arg, %arg : index + affine.for %n1 = #map(%dim)[%arg] to 7 { + } + } + return +} + +// CHECK-SAME: %[[VAL_0:.*]]: index) { +// CHECK: affine.for %[[VAL_1:.*]] = 0 to 7 { +// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_0]] : index +// CHECK: affine.for %[[VAL_3:.*]] = #[[$ATTR_0]](%[[VAL_2]]){{\[}}%[[VAL_0]]] to 7 { +// CHECK: } +// CHECK: } diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir index 72572c6a38de1..0a5c85336831a 100644 --- a/mlir/test/Dialect/GPU/transform-gpu.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu.mlir @@ -43,7 +43,7 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 128)> +// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0] -> (s0 floordiv 128)> // CHECK-LABEL: func.func @warpgroup_3d( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -61,7 +61,7 @@ func.func @warpgroup_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream // CHECK: gpu.launch // CHECK: %[[TIDX:.*]] = gpu.thread_id x // CHECK: %[[TIDY:.*]] = gpu.thread_id y -// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]](%[[TIDX]]) +// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]] // CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C384]] : index // CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C1]] : index // CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1 @@ -95,7 +95,7 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)> +// CHECK-DAG: #map = affine_map<()[s0] -> (s0 floordiv 16)> // CHECK-LABEL: func.func @warp_3d( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -114,7 +114,7 @@ func.func @warp_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !g // CHECK: gpu.launch // CHECK: %[[TIDX:.*]] = gpu.thread_id x // CHECK: %[[TIDY:.*]] = gpu.thread_id y -// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]](%[[TIDX]]) +// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]] // CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C32]] : index // CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C3]] : index // CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1 @@ -354,9 +354,9 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)> -// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 32) floordiv 128) mod 2)> -// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<(d0, d1, d2) -> (d2 + ((d0 + d1 * 32) floordiv 128) floordiv 2)> +// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)> +// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 32) floordiv 128) mod 2)> +// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<()[s0, s1, s2] -> (s2 + ((s0 + s1 * 32) floordiv 128) floordiv 2)> // CHECK-LABEL: func.func @warpgroup_linear( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -376,9 +376,9 @@ func.func @warpgroup_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %st // CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x // CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y // CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z -// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) -// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]](%[[TIDX]], %[[TIDY]]) -// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) +// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] +// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]]()[%[[TIDX]], %[[TIDY]]] +// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C768]] : index // CHECK: scf.if %[[CMPLIN]] // CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]] @@ -410,9 +410,9 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)> -// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) mod 2)> -// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) floordiv 2)> +// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)> +// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) mod 2)> +// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) floordiv 2)> // CHECK-LABEL: func.func @warp_linear( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -432,9 +432,9 @@ func.func @warp_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream // CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x // CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y // CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z -// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) -// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) -// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) +// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] +// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] +// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C192]] : index // CHECK: scf.if %[[CMPLIN]] // CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]] @@ -466,12 +466,12 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 18) floordiv 32) mod 3)> -// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 18) floordiv 32) mod 6) floordiv 3)> +// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 18) floordiv 32) mod 3)> +// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1] -> ((((s0 + s1 * 18) floordiv 32) mod 6) floordiv 3)> -// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 18)> -// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) mod 10)> -// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) floordiv 10)> +// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 18)> +// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) mod 10)> +// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) floordiv 10)> // CHECK-LABEL: func.func @map_multi_level_linear( func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !gpu.async.token) -> !type { @@ -504,9 +504,9 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3 memref.store %6, %y[%i, %j] : !type } { mapping = [#gpu.thread, #gpu.thread]} - // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]](%[[TIDX]], %[[TIDY]]) - // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]]) - // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]]) + // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]]()[%[[TIDX]], %[[TIDY]]] + // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]]] + // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]]] // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[LIN]], %[[C192]] : index // CHECK: scf.if %[[CMPLIN]] scf.forall (%i, %j, %k) in (%c3, %c2, %c1) { @@ -515,8 +515,8 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3 memref.store %8, %y[%i, %j] : !type } {mapping = [#gpu.warp, #gpu.warp, #gpu.warp] } - // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]]) - // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]](%[[TIDX]], %[[TIDY]]) + // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]]()[%[[TIDX]], %[[TIDY]]] + // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]]()[%[[TIDX]], %[[TIDY]]] // CHECK-DAG: %[[COND:.*]] = arith.cmpi ult, %[[LIN]], %[[C20]] : index // CHECK: scf.if %[[COND]] // CHECK: memref.load %{{.*}}[%[[LIDX]]] : memref<32xf32> @@ -545,9 +545,9 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 12 + d2 * 108)> -// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) mod 7)> -// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) floordiv 7)> +// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 12 + s2 * 108)> +// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) mod 7)> +// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) floordiv 7)> // CHECK-LABEL: func.func @block_linear_existing_launch( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -566,9 +566,9 @@ func.func @block_linear_existing_launch( // CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x // CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y // CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z -// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]](%[[BIDX]], %[[BIDY]], %[[BIDZ]]) -// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]], %[[BIDY]], %[[BIDZ]]) -// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]]) +// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]] +// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]] +// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]] // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[BIDLIN]], %[[C63]] : index // CHECK: scf.if %[[CMPLIN]] // CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]] @@ -600,8 +600,8 @@ module attributes {transform.with_named_sequence} { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0) -> (d0 mod 7)> -// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> (d1 + d2 * 9 + d0 floordiv 7)> +// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0] -> (s0 mod 7)> +// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * 9 + s0 floordiv 7)> // CHECK-LABEL: func.func @block_linear_generate_launch( // CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32> @@ -620,8 +620,8 @@ func.func @block_linear_generate_launch( // CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x // CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y // CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z -// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]]) -// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]]) +// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]]] +// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]] // CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]] // CHECK: memref.load %[[ARGY]][%[[BLX]], %[[BLY]]] scf.forall (%i, %j) in (%c7, %c9) { @@ -647,8 +647,8 @@ module attributes {transform.with_named_sequence} { #map = affine_map<(d0) -> (d0 * 128)> #map1 = affine_map<(d0) -> (d0 * 32)> -// CHECK-DAG: #[[$MAPB:.*]] = affine_map<(d0) -> (d0 * 128)> -// CHECK-DAG: #[[$MAPW:.*]] = affine_map<(d0, d1, d2) -> (d2 * 32 + ((d0 + d1 * 4) floordiv 32) * 32)> +// CHECK-DAG: #[[$MAPB:.*]] = affine_map<()[s0] -> (s0 * 128)> +// CHECK-DAG: #[[$MAPW:.*]] = affine_map<()[s0, s1, s2] -> (s2 * 32 + ((s0 + s1 * 4) floordiv 32) * 32)> // CHECK-LABEL: func.func @simple_fill( func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> { @@ -660,14 +660,14 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> { // CHECK: gpu.launch scf.forall (%arg1) in (1) { // CHECK: %[[BIDX:.*]] = gpu.block_id x -// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]](%[[BIDX]]) +// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]]()[%[[BIDX]]] %0 = affine.apply #map(%arg1) %subview = memref.subview %arg0[%0] [128] [1] : memref<128xf32> to memref<128xf32, strided<[1], offset: ?>> scf.forall (%arg2) in (4) { // CHECK: %[[TIDX:.*]] = gpu.thread_id x // CHECK: %[[TIDY:.*]] = gpu.thread_id y // CHECK: %[[TIDZ:.*]] = gpu.thread_id z -// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]](%[[TIDX]], %[[TIDY]], %[[TIDZ]]) +// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]] // CHECK-NOT: scf.if // CHECK: memref.subview %{{.*}}[%[[THX]]] %1 = affine.apply #map1(%arg2) diff --git a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir index c7c846d7ecc9c..c17f20b2d03ab 100644 --- a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir +++ b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir @@ -40,9 +40,9 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[KINDEX:.+]] = linalg.index 2 : index // Compute input channel/convolved indices. -// CHECK: %[[ICINDEX:.+]] = affine.apply affine_map<(d0) -> (d0 mod 4)>(%[[KINDEX]]) -// CHECK: %[[CONVH:.+]] = affine.apply affine_map<(d0, d1) -> (d0 floordiv 14 + d1 floordiv 12)>(%[[MINDEX]], %[[KINDEX]]) -// CHECK: %[[CONVW:.+]] = affine.apply affine_map<(d0, d1) -> (d0 mod 14 + (d1 mod 12) floordiv 4)>(%[[MINDEX]], %[[KINDEX]]) +// CHECK: %[[ICINDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 mod 4)>()[%[[KINDEX]]] +// CHECK: %[[CONVH:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 floordiv 14 + s1 floordiv 12)>()[%[[MINDEX]], %[[KINDEX]]] +// CHECK: %[[CONVW:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 mod 14 + (s1 mod 12) floordiv 4)>()[%[[MINDEX]], %[[KINDEX]]] // Extract from the input tensor. // CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract @@ -227,9 +227,9 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // Im2col maps -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 9)> -// CHECK-DAG: #[[MAP7:.+]] = affine_map<(d0, d1) -> (d0 floordiv 14 + (d1 mod 9) floordiv 3)> -// CHECK-DAG: #[[MAP8:.+]] = affine_map<(d0, d1) -> (d0 + d1 - (d0 floordiv 14) * 14 - (d1 floordiv 3) * 3)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 floordiv 9)> +// CHECK-DAG: #[[MAP7:.+]] = affine_map<()[s0, s1] -> (s0 floordiv 14 + (s1 mod 9) floordiv 3)> +// CHECK-DAG: #[[MAP8:.+]] = affine_map<()[s0, s1] -> (s0 + s1 - (s0 floordiv 14) * 14 - (s1 floordiv 3) * 3)> // CHECK-DAG: #[[LHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)> @@ -251,9 +251,9 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[NINDEX:.+]] = linalg.index 2 : index // Compute input channel/convolved indices. -// CHECK: %[[ICINDEX:.+]] = affine.apply #[[MAP1]](%[[KINDEX]]) -// CHECK: %[[CONVH:.+]] = affine.apply #[[MAP7]](%[[NINDEX]], %[[KINDEX]]) -// CHECK: %[[CONVW:.+]] = affine.apply #[[MAP8]](%[[NINDEX]], %[[KINDEX]]) +// CHECK: %[[ICINDEX:.+]] = affine.apply #[[MAP1]]()[%[[KINDEX]]] +// CHECK: %[[CONVH:.+]] = affine.apply #[[MAP7]]()[%[[NINDEX]], %[[KINDEX]]] +// CHECK: %[[CONVW:.+]] = affine.apply #[[MAP8]]()[%[[NINDEX]], %[[KINDEX]]] // Extract from the input tensor. // CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract @@ -300,9 +300,9 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[KINDEX:.+]] = linalg.index 2 : index // Compute input channel/convolved indices. -// CHECK: %[[ICINDEX:.+]] = affine.apply affine_map<(d0) -> (d0 mod 4)>(%[[KINDEX]]) -// CHECK: %[[CONVH:.+]] = affine.apply affine_map<(d0, d1) -> (d0 floordiv 14 + d1 floordiv 12)>(%[[MINDEX]], %[[KINDEX]]) -// CHECK: %[[CONVW:.+]] = affine.apply affine_map<(d0, d1) -> (d0 mod 14 + (d1 mod 12) floordiv 4)>(%[[MINDEX]], %[[KINDEX]]) +// CHECK: %[[ICINDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 mod 4)>()[%[[KINDEX]]] +// CHECK: %[[CONVH:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 floordiv 14 + s1 floordiv 12)>()[%[[MINDEX]], %[[KINDEX]]] +// CHECK: %[[CONVW:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 mod 14 + (s1 mod 12) floordiv 4)>()[%[[MINDEX]], %[[KINDEX]]] // Extract from the input tensor. // CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir index b4aa0a33bc592..d96a251b01ccb 100644 --- a/mlir/test/Dialect/Linalg/tile-indexed.mlir +++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir @@ -19,13 +19,13 @@ module attributes {transform.with_named_sequence} { } } -// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> +// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0)[s0] -> (d0 + s0)> // TILE-10n25-LABEL: func @indexed_vector // TILE-10n25: %[[C10:.*]] = arith.constant 10 : index // TILE-10n25: scf.for %[[J:.*]] = {{.*}} step %[[C10]] // TILE-10n25: linalg.generic // TILE-10n25: %[[I:.*]] = linalg.index 0 : index -// TILE-10n25: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]]) +// TILE-10n25: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[J]])[%[[I]]] // TILE-10n25: linalg.yield %[[NEW_I]] : index // ----- @@ -51,7 +51,7 @@ module attributes {transform.with_named_sequence} { } } -// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> +// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0)[s0] -> (d0 + s0)> // TILE-10n25-LABEL: func @indexed_matrix // TILE-10n25-DAG: %[[C25:.*]] = arith.constant 25 : index // TILE-10n25-DAG: %[[C10:.*]] = arith.constant 10 : index @@ -59,8 +59,8 @@ module attributes {transform.with_named_sequence} { // TILE-10n25: scf.for %[[L:.*]] = {{.*}} step %[[C25]] // TILE-10n25: linalg.generic // TILE-10n25: %[[I:.*]] = linalg.index 0 : index -// TILE-10n25: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[K]]) +// TILE-10n25: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[K]])[%[[I]]] // TILE-10n25: %[[J:.*]] = linalg.index 1 : index -// TILE-10n25: %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]]) +// TILE-10n25: %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[L]])[%[[J]]] // TILE-10n25: %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[NEW_J]] : index // TILE-10n25: linalg.yield %[[SUM]] : index diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir index 68c849385ba6b..7f0ef401c8422 100644 --- a/mlir/test/Dialect/Linalg/transform-op-split.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir @@ -10,7 +10,7 @@ module attributes {transform.with_named_sequence} { func.func private @elem(%arg0: f32, %arg1: index, %arg2: index) -> f32 -// CHECK: #[[$ADD_42_MAP:.+]] = affine_map<(d0) -> (d0 + 42)> +// CHECK: #[[$ADD_42_MAP:.+]] = affine_map<()[s0] -> (s0 + 42)> // CHECK-LABEL: @one_d_static // CHECK-SAME: %[[IN:.+]]: tensor<100xf32>, %[[OUT:.+]]: tensor<100xf32> @@ -30,7 +30,7 @@ func.func @one_d_static(%arg0: tensor<100xf32>, %arg1: tensor<100xf32>) -> tenso // CHECK: ins(%[[IN_SLICE_HIGH]] // CHECK: outs(%[[OUT_SLICE_HIGH]] // CHECK: %[[IDX:.+]] = linalg.index 0 - // CHECK: affine.apply #[[$ADD_42_MAP]](%[[IDX]]) + // CHECK: affine.apply #[[$ADD_42_MAP]]()[%[[IDX]]] // CHECK: func.call @elem // CHECK: %[[RES:.+]] = tensor.insert_slice %[[RES_SLICE_HIGH]] into %[[RES_PARTIAL]][42] [58] [1] %0 = linalg.generic { diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir index 8eb1311170c66..2d9d7e432d875 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir @@ -259,14 +259,14 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK: #[[$MAP_ADD:.+]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK: #[[$MAP_ADD:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> // CHECK-LABEL: @indexed_semantics // CHECK: scf.for %[[I0:.+]] = %{{.*}} to %{{.*}} step %{{.*}} // CHECK: scf.for %[[I1:.+]] = %{{.*}} to %{{.*}} step %{{.*}} // CHECK: %[[INDEX0:.+]] = linalg.index 0 -// CHECK: %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX0]], %[[I0]]) +// CHECK: %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[I0]])[%[[INDEX0]]] // CHECK: %[[INDEX1:.+]] = linalg.index 1 -// CHECK: %[[INDEX1_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX1]], %[[I1]]) +// CHECK: %[[INDEX1_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[I1]])[%[[INDEX1]]] // CHECK: arith.addi %[[INDEX0_AMENDED]], %[[INDEX1_AMENDED]] // ----- diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir index 53dd0c6a2425c..745a82fc0da75 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir @@ -205,7 +205,7 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK: #[[$MAP_ADD:.+]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK: #[[$MAP_ADD:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> func.func @indexed_semantics(%arg0: tensor, %arg1: tensor) -> tensor { // Check that we correctly amend "linalg.index" results. @@ -241,9 +241,9 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: @indexed_semantics // CHECK: scf.forall (%[[I0:.+]], %[[I1:.+]]) = // CHECK: %[[INDEX0:.+]] = linalg.index 0 -// CHECK: %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX0]], %[[I0]]) +// CHECK: %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[I0]])[%[[INDEX0]]] // CHECK: %[[INDEX1:.+]] = linalg.index 1 -// CHECK: %[[INDEX1_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX1]], %[[I1]]) +// CHECK: %[[INDEX1_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[I1]])[%[[INDEX1]]] // CHECK: arith.addi %[[INDEX0_AMENDED]], %[[INDEX1_AMENDED]] // ----- From f597d346ab6e42cbfe421b153abf7ece6b592f1d Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Fri, 17 Jan 2025 18:23:07 +0500 Subject: [PATCH 02/88] [clang][Sema] Move computing best enum types to a separate function (#120965) Move the code that computes BestType and BestPromotionType for an enum to a separate function which can be called from outside of Sema. --- clang/include/clang/AST/ASTContext.h | 7 +++ clang/lib/AST/ASTContext.cpp | 79 ++++++++++++++++++++++++++++ clang/lib/Sema/SemaDecl.cpp | 77 +++------------------------ 3 files changed, 92 insertions(+), 71 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 0e07c5d6ce8fb..4e9b961688d55 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1726,6 +1726,13 @@ class ASTContext : public RefCountedBase { QualType getEnumType(const EnumDecl *Decl) const; + /// Compute BestType and BestPromotionType for an enum based on the highest + /// number of negative and positive bits of its elements. + /// Returns true if enum width is too large. + bool computeBestEnumTypes(bool IsPacked, unsigned NumNegativeBits, + unsigned NumPositiveBits, QualType &BestType, + QualType &BestPromotionType); + QualType getUnresolvedUsingType(const UnresolvedUsingTypenameDecl *Decl) const; diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index d0ce4c511aedd..155dbcfcaeed3 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -5209,6 +5209,85 @@ QualType ASTContext::getEnumType(const EnumDecl *Decl) const { return QualType(newType, 0); } +bool ASTContext::computeBestEnumTypes(bool IsPacked, unsigned NumNegativeBits, + unsigned NumPositiveBits, + QualType &BestType, + QualType &BestPromotionType) { + unsigned IntWidth = Target->getIntWidth(); + unsigned CharWidth = Target->getCharWidth(); + unsigned ShortWidth = Target->getShortWidth(); + bool EnumTooLarge = false; + unsigned BestWidth; + if (NumNegativeBits) { + // If there is a negative value, figure out the smallest integer type (of + // int/long/longlong) that fits. + // If it's packed, check also if it fits a char or a short. + if (IsPacked && NumNegativeBits <= CharWidth && + NumPositiveBits < CharWidth) { + BestType = SignedCharTy; + BestWidth = CharWidth; + } else if (IsPacked && NumNegativeBits <= ShortWidth && + NumPositiveBits < ShortWidth) { + BestType = ShortTy; + BestWidth = ShortWidth; + } else if (NumNegativeBits <= IntWidth && NumPositiveBits < IntWidth) { + BestType = IntTy; + BestWidth = IntWidth; + } else { + BestWidth = Target->getLongWidth(); + + if (NumNegativeBits <= BestWidth && NumPositiveBits < BestWidth) { + BestType = LongTy; + } else { + BestWidth = Target->getLongLongWidth(); + + if (NumNegativeBits > BestWidth || NumPositiveBits >= BestWidth) + EnumTooLarge = true; + BestType = LongLongTy; + } + } + BestPromotionType = (BestWidth <= IntWidth ? IntTy : BestType); + } else { + // If there is no negative value, figure out the smallest type that fits + // all of the enumerator values. + // If it's packed, check also if it fits a char or a short. + if (IsPacked && NumPositiveBits <= CharWidth) { + BestType = UnsignedCharTy; + BestPromotionType = IntTy; + BestWidth = CharWidth; + } else if (IsPacked && NumPositiveBits <= ShortWidth) { + BestType = UnsignedShortTy; + BestPromotionType = IntTy; + BestWidth = ShortWidth; + } else if (NumPositiveBits <= IntWidth) { + BestType = UnsignedIntTy; + BestWidth = IntWidth; + BestPromotionType = (NumPositiveBits == BestWidth || !LangOpts.CPlusPlus) + ? UnsignedIntTy + : IntTy; + } else if (NumPositiveBits <= (BestWidth = Target->getLongWidth())) { + BestType = UnsignedLongTy; + BestPromotionType = (NumPositiveBits == BestWidth || !LangOpts.CPlusPlus) + ? UnsignedLongTy + : LongTy; + } else { + BestWidth = Target->getLongLongWidth(); + if (NumPositiveBits > BestWidth) { + // This can happen with bit-precise integer types, but those are not + // allowed as the type for an enumerator per C23 6.7.2.2p4 and p12. + // FIXME: GCC uses __int128_t and __uint128_t for cases that fit within + // a 128-bit integer, we should consider doing the same. + EnumTooLarge = true; + } + BestType = UnsignedLongLongTy; + BestPromotionType = (NumPositiveBits == BestWidth || !LangOpts.CPlusPlus) + ? UnsignedLongLongTy + : LongLongTy; + } + } + return EnumTooLarge; +} + QualType ASTContext::getUnresolvedUsingType( const UnresolvedUsingTypenameDecl *Decl) const { if (Decl->TypeForDecl) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index e0dd6039810cb..4b56a4dea05e5 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -20043,10 +20043,6 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange, return; } - unsigned IntWidth = Context.getTargetInfo().getIntWidth(); - unsigned CharWidth = Context.getTargetInfo().getCharWidth(); - unsigned ShortWidth = Context.getTargetInfo().getShortWidth(); - // Verify that all the values are okay, compute the size of the values, and // reverse the list. unsigned NumNegativeBits = 0; @@ -20112,73 +20108,12 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange, BestPromotionType = BestType; BestWidth = Context.getIntWidth(BestType); - } - else if (NumNegativeBits) { - // If there is a negative value, figure out the smallest integer type (of - // int/long/longlong) that fits. - // If it's packed, check also if it fits a char or a short. - if (Packed && NumNegativeBits <= CharWidth && NumPositiveBits < CharWidth) { - BestType = Context.SignedCharTy; - BestWidth = CharWidth; - } else if (Packed && NumNegativeBits <= ShortWidth && - NumPositiveBits < ShortWidth) { - BestType = Context.ShortTy; - BestWidth = ShortWidth; - } else if (NumNegativeBits <= IntWidth && NumPositiveBits < IntWidth) { - BestType = Context.IntTy; - BestWidth = IntWidth; - } else { - BestWidth = Context.getTargetInfo().getLongWidth(); - - if (NumNegativeBits <= BestWidth && NumPositiveBits < BestWidth) { - BestType = Context.LongTy; - } else { - BestWidth = Context.getTargetInfo().getLongLongWidth(); - - if (NumNegativeBits > BestWidth || NumPositiveBits >= BestWidth) - Diag(Enum->getLocation(), diag::ext_enum_too_large); - BestType = Context.LongLongTy; - } - } - BestPromotionType = (BestWidth <= IntWidth ? Context.IntTy : BestType); } else { - // If there is no negative value, figure out the smallest type that fits - // all of the enumerator values. - // If it's packed, check also if it fits a char or a short. - if (Packed && NumPositiveBits <= CharWidth) { - BestType = Context.UnsignedCharTy; - BestPromotionType = Context.IntTy; - BestWidth = CharWidth; - } else if (Packed && NumPositiveBits <= ShortWidth) { - BestType = Context.UnsignedShortTy; - BestPromotionType = Context.IntTy; - BestWidth = ShortWidth; - } else if (NumPositiveBits <= IntWidth) { - BestType = Context.UnsignedIntTy; - BestWidth = IntWidth; - BestPromotionType - = (NumPositiveBits == BestWidth || !getLangOpts().CPlusPlus) - ? Context.UnsignedIntTy : Context.IntTy; - } else if (NumPositiveBits <= - (BestWidth = Context.getTargetInfo().getLongWidth())) { - BestType = Context.UnsignedLongTy; - BestPromotionType - = (NumPositiveBits == BestWidth || !getLangOpts().CPlusPlus) - ? Context.UnsignedLongTy : Context.LongTy; - } else { - BestWidth = Context.getTargetInfo().getLongLongWidth(); - if (NumPositiveBits > BestWidth) { - // This can happen with bit-precise integer types, but those are not - // allowed as the type for an enumerator per C23 6.7.2.2p4 and p12. - // FIXME: GCC uses __int128_t and __uint128_t for cases that fit within - // a 128-bit integer, we should consider doing the same. - Diag(Enum->getLocation(), diag::ext_enum_too_large); - } - BestType = Context.UnsignedLongLongTy; - BestPromotionType - = (NumPositiveBits == BestWidth || !getLangOpts().CPlusPlus) - ? Context.UnsignedLongLongTy : Context.LongLongTy; - } + bool EnumTooLarge = Context.computeBestEnumTypes( + Packed, NumNegativeBits, NumPositiveBits, BestType, BestPromotionType); + BestWidth = Context.getIntWidth(BestType); + if (EnumTooLarge) + Diag(Enum->getLocation(), diag::ext_enum_too_large); } // Loop over all of the enumerator constants, changing their types to match @@ -20210,7 +20145,7 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange, // int; or, // - the enumerated type NewTy = Context.IntTy; - NewWidth = IntWidth; + NewWidth = Context.getTargetInfo().getIntWidth(); NewSign = true; } else if (ECD->getType() == BestType) { // Already the right type! From 6dcb2a09028b25f8a8cfbda486d9b87a42fd3b30 Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Fri, 17 Jan 2025 19:02:25 +0530 Subject: [PATCH 03/88] [MLIR][NVVM] Add Float to TF32 conversion Op (#123199) PR #121507 added 'cvt' intrinsics to convert float to tf32, with the valid set of rounding and saturation modes. This PR adds an NVVM Dialect Op for the same. * lit tests are added to verify the lowering to intrinsics. * Negative tests are also added to check the error-handling of invalid combinations. Signed-off-by: Durgadoss R --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 71 +++++++++++++++++++++ mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 40 ++++++++++++ mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir | 43 +++++++++++++ mlir/test/Target/LLVMIR/nvvmir-invalid.mlir | 32 ++++++++++ 4 files changed, 186 insertions(+) create mode 100644 mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 04042903e343e..bf3131932a56b 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -970,6 +970,77 @@ def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.share }]; } +//===----------------------------------------------------------------------===// +// NVVM Conversion Ops (for "cvt.*" family of PTX instructions) +//===----------------------------------------------------------------------===// + +// Attributes for the floating point rounding modes supported by PTX +def FPRoundingModeNone : I32EnumAttrCase<"NONE", 0, "none">; +def FPRoundingModeRN : I32EnumAttrCase<"RN", 1, "rn">; +def FPRoundingModeRM : I32EnumAttrCase<"RM", 2, "rm">; +def FPRoundingModeRP : I32EnumAttrCase<"RP", 3, "rp">; +def FPRoundingModeRZ : I32EnumAttrCase<"RZ", 4, "rz">; +def FPRoundingModeRNA : I32EnumAttrCase<"RNA", 5, "rna">; + +def FPRoundingMode : I32EnumAttr<"FPRoundingMode", "NVVM FPRoundingMode kind", + [FPRoundingModeNone, FPRoundingModeRN, FPRoundingModeRM, + FPRoundingModeRP, FPRoundingModeRZ, FPRoundingModeRNA]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def FPRoundingModeAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + +def SaturationModeNone : I32EnumAttrCase<"NONE", 0, "none">; +def SaturationModeFinite : I32EnumAttrCase<"SATFINITE", 1, "satfinite">; + +def SaturationMode : I32EnumAttr<"SaturationMode", "NVVM SaturationMode kind", + [SaturationModeNone, SaturationModeFinite]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def SaturationModeAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + +def NVVM_CvtFloatToTF32Op : NVVM_Op<"cvt.float.to.tf32"> { + let summary = "Convert the given float input to TF32"; + let description = [{ + This Op converts the given f32 input to tf32. + The result `res` is represented as an i32 type. + The `relu` attribute, when set, lowers to the '.relu' variant of + the cvt instruction. The `rnd` and `sat` attributes specify the + the rounding and saturation modes respectively. + [For more information, see PTX ISA] + (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt) + }]; + + let hasVerifier = 1; + let results = (outs I32:$res); + let arguments = (ins + F32:$src, + DefaultValuedAttr:$rnd, + DefaultValuedAttr:$sat, + DefaultValuedAttr:$relu); + + let assemblyFormat = "$src attr-dict"; + + let extraClassDeclaration = [{ + static llvm::Intrinsic::ID getIntrinsicID(NVVM::FPRoundingMode, + NVVM::SaturationMode, + bool hasRelu); + }]; + + string llvmBuilder = [{ + auto intId = NVVM::CvtFloatToTF32Op::getIntrinsicID($rnd, $sat, $relu); + $res = createIntrinsicCall(builder, intId, {$src}); + }]; +} + +//===----------------------------------------------------------------------===// +// NVVM MMA Ops +//===----------------------------------------------------------------------===// /// Helpers to instantiate different version of wmma intrinsics. /// This matches the hierarchy used in IntrinsicsNVVM.td to define all the /// combinations of the intrinsics. diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index d8fde3e765ac4..ccb5ad05f0bf7 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -138,6 +138,26 @@ LogicalResult CpAsyncBulkTensorReduceOp::verify() { getLoc()); } +LogicalResult CvtFloatToTF32Op::verify() { + using RndMode = NVVM::FPRoundingMode; + switch (getRnd()) { + case RndMode::RNA: + if (getRelu()) + return emitError("Relu not supported with rna rounding mode."); + break; + case RndMode::RN: + case RndMode::RZ: + if (getSat() != NVVM::SaturationMode::NONE) + return emitError( + "Saturation mode not supported with rn/rz rounding modes."); + break; + default: + return emitError( + "Only {rn,rz,rna} rounding modes supported for CvtFloatToTF32Op."); + } + return success(); +} + // Given the element type of an operand and whether or not it is an accumulator, // this function returns the PTX type (`NVVM::MMATypes`) that corresponds to the // operand's element type. @@ -1163,6 +1183,26 @@ llvm::Intrinsic::ID CpAsyncBulkTensorReduceOp::getIntrinsicID( llvm_unreachable("Invalid Reduction Op for CpAsyncBulkTensorReduceOp"); } +llvm::Intrinsic::ID CvtFloatToTF32Op::getIntrinsicID(NVVM::FPRoundingMode rnd, + NVVM::SaturationMode sat, + bool hasRelu) { + using RndMode = NVVM::FPRoundingMode; + switch (rnd) { + case RndMode::RN: + return hasRelu ? llvm::Intrinsic::nvvm_f2tf32_rn_relu + : llvm::Intrinsic::nvvm_f2tf32_rn; + case RndMode::RZ: + return hasRelu ? llvm::Intrinsic::nvvm_f2tf32_rz_relu + : llvm::Intrinsic::nvvm_f2tf32_rz; + case RndMode::RNA: + return (sat == NVVM::SaturationMode::SATFINITE) + ? llvm::Intrinsic::nvvm_f2tf32_rna_satfinite + : llvm::Intrinsic::nvvm_f2tf32_rna; + default: + llvm_unreachable("Invalid RoundingMode for CvtFloatToTF32Op"); + } +} + /// Infer the result ranges for the NVVM SpecialRangeableRegisterOp that might /// have ConstantRangeAttr. static void nvvmInferResultRanges(Operation *op, Value result, diff --git a/mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir b/mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir new file mode 100644 index 0000000000000..90a232e4baac6 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/cvt_tf32.mlir @@ -0,0 +1,43 @@ +// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s + +// CHECK-LABEL: @convert_float_to_tf32_rna +llvm.func @convert_float_to_tf32_rna(%src : f32) -> i32 { + // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rna(float %{{.*}}) + %res = nvvm.cvt.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode} + llvm.return %res : i32 +} + +// CHECK-LABEL: @convert_float_to_tf32_rna_sf +llvm.func @convert_float_to_tf32_rna_sf(%src : f32) -> i32 { + // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rna.satfinite(float %{{.*}}) + %res = nvvm.cvt.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} + llvm.return %res : i32 +} + +// CHECK-LABEL: @convert_float_to_tf32_rn +llvm.func @convert_float_to_tf32_rn(%src : f32) -> i32 { + // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rn(float %{{.*}}) + %res = nvvm.cvt.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode} + llvm.return %res : i32 +} + +// CHECK-LABEL: @convert_float_to_tf32_rn_relu +llvm.func @convert_float_to_tf32_rn_relu(%src : f32) -> i32 { + // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rn.relu(float %{{.*}}) + %res = nvvm.cvt.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, relu=true} + llvm.return %res : i32 +} + +// CHECK-LABEL: @convert_float_to_tf32_rz +llvm.func @convert_float_to_tf32_rz(%src : f32) -> i32 { + // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rz(float %{{.*}}) + %res = nvvm.cvt.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode} + llvm.return %res : i32 +} + +// CHECK-LABEL: @convert_float_to_tf32_rz_relu +llvm.func @convert_float_to_tf32_rz_relu(%src : f32) -> i32 { + // CHECK: %{{.*}} = call i32 @llvm.nvvm.f2tf32.rz.relu(float %{{.*}}) + %res = nvvm.cvt.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, relu=true} + llvm.return %res : i32 +} diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir index 44c7126255dc4..cb08064590bc3 100644 --- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir @@ -106,3 +106,35 @@ llvm.func @tma_reduce_2d_im2col(%src : !llvm.ptr<3>, %tma_desc : !llvm.ptr, %d0 nvvm.cp.async.bulk.tensor.reduce %tma_desc, %src, box[%d0, %d1] {redKind = #nvvm.tma_redux_kind, mode = #nvvm.tma_store_mode}: !llvm.ptr, !llvm.ptr<3> llvm.return } + +// ----- + +llvm.func @convert_float_to_tf32_rna_relu(%src : f32) -> i32 { + // expected-error @below {{Relu not supported with rna rounding mode.}} + %res = nvvm.cvt.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, relu=true} + llvm.return %res : i32 +} + +// ----- + +llvm.func @convert_float_to_tf32_rn_sf(%src : f32) -> i32 { + // expected-error @below {{Saturation mode not supported with rn/rz rounding modes.}} + %res = nvvm.cvt.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} + llvm.return %res : i32 +} + +// ----- + +llvm.func @convert_float_to_tf32_rz_sf(%src : f32) -> i32 { + // expected-error @below {{Saturation mode not supported with rn/rz rounding modes.}} + %res = nvvm.cvt.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} + llvm.return %res : i32 +} + +// ----- + +llvm.func @convert_float_to_tf32_no_rnd_mode(%src : f32) -> i32 { + // expected-error @below {{Only {rn,rz,rna} rounding modes supported for CvtFloatToTF32Op.}} + %res = nvvm.cvt.float.to.tf32 %src + llvm.return %res : i32 +} From 361f363c11265c6ce599a49dd081bab606b14de8 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Fri, 17 Jan 2025 21:41:48 +0800 Subject: [PATCH 04/88] [clang-tidy]fix incorrect fix-it for the string contains a user-defined suffix (#122901) Fixed: #97243 --- .../modernize/RawStringLiteralCheck.cpp | 28 ++++++++++++++----- .../modernize/RawStringLiteralCheck.h | 2 +- clang-tools-extra/docs/ReleaseNotes.rst | 4 +++ .../checkers/modernize/raw-string-literal.cpp | 13 +++++++++ 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp index 7ec62f41aec01..126463ae795eb 100644 --- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp @@ -10,6 +10,7 @@ #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Lex/Lexer.h" +#include "llvm/ADT/StringRef.h" using namespace clang::ast_matchers; @@ -136,13 +137,26 @@ void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) { void RawStringLiteralCheck::replaceWithRawStringLiteral( const MatchFinder::MatchResult &Result, const StringLiteral *Literal, - StringRef Replacement) { - CharSourceRange CharRange = Lexer::makeFileCharRange( - CharSourceRange::getTokenRange(Literal->getSourceRange()), - *Result.SourceManager, getLangOpts()); - diag(Literal->getBeginLoc(), - "escaped string literal can be written as a raw string literal") - << FixItHint::CreateReplacement(CharRange, Replacement); + std::string Replacement) { + DiagnosticBuilder Builder = + diag(Literal->getBeginLoc(), + "escaped string literal can be written as a raw string literal"); + const SourceManager &SM = *Result.SourceManager; + const CharSourceRange TokenRange = + CharSourceRange::getTokenRange(Literal->getSourceRange()); + Token T; + if (Lexer::getRawToken(Literal->getBeginLoc(), T, SM, getLangOpts())) + return; + const CharSourceRange CharRange = + Lexer::makeFileCharRange(TokenRange, SM, getLangOpts()); + if (T.hasUDSuffix()) { + const StringRef Text = Lexer::getSourceText(CharRange, SM, getLangOpts()); + const size_t UDSuffixPos = Text.find_last_of('"'); + if (UDSuffixPos == StringRef::npos) + return; + Replacement += Text.slice(UDSuffixPos + 1, Text.size()); + } + Builder << FixItHint::CreateReplacement(CharRange, Replacement); } } // namespace clang::tidy::modernize diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h index aae58ca0e98d9..6898e0624d1eb 100644 --- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h @@ -35,7 +35,7 @@ class RawStringLiteralCheck : public ClangTidyCheck { private: void replaceWithRawStringLiteral( const ast_matchers::MatchFinder::MatchResult &Result, - const StringLiteral *Literal, StringRef Replacement); + const StringLiteral *Literal, std::string Replacement); std::string DelimiterStem; CharsBitSet DisallowedChars; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 8ba47dfc84f26..33a452f525f76 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -321,6 +321,10 @@ Changes in existing checks a false positive when only an implicit conversion happened inside an initializer list. +- Improved :doc:`modernize-raw-string-literal + ` check to fix incorrect + fix-it when the string contains a user-defined suffix. + - Improved :doc:`modernize-use-designated-initializers ` check to fix a crash when a class is declared but not defined. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/raw-string-literal.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/raw-string-literal.cpp index ad5d450036f2f..5856b8882574a 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/raw-string-literal.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/raw-string-literal.cpp @@ -129,3 +129,16 @@ void callFn() { // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: {{.*}} can be written as a raw string literal // CHECK-FIXES: {{^}} fn(R"(foo\bar)");{{$}} } + +namespace std { +using size_t = decltype(sizeof(0)); +namespace ud { +int operator""_abc(const char *str, std::size_t len); +} // namespace ud +} // namespace std +namespace gh97243 { +using namespace std::ud; +auto UserDefinedLiteral = "foo\\bar"_abc; +// CHECK-MESSAGES: :[[@LINE-1]]:27: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}auto UserDefinedLiteral = R"(foo\bar)"_abc; +} // namespace gh97243 From 48d0ef1a07993139e1acf65910704255443103a5 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Fri, 17 Jan 2025 21:47:47 +0800 Subject: [PATCH 05/88] [clang-tidy][NFC] refactor modernize-raw-string-literal fix hint (#122909) --- .../modernize/RawStringLiteralCheck.cpp | 105 +++++++++++------- .../modernize/RawStringLiteralCheck.h | 4 - 2 files changed, 62 insertions(+), 47 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp index 126463ae795eb..24674a407cb36 100644 --- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp @@ -9,8 +9,11 @@ #include "RawStringLiteralCheck.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/StringRef.h" +#include using namespace clang::ast_matchers; @@ -67,20 +70,6 @@ bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) { : (")" + Delimiter + R"(")")) != StringRef::npos; } -std::string asRawStringLiteral(const StringLiteral *Literal, - const std::string &DelimiterStem) { - const StringRef Bytes = Literal->getBytes(); - std::string Delimiter; - for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) { - Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I); - } - - if (Delimiter.empty()) - return (R"(R"()" + Bytes + R"lit()")lit").str(); - - return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str(); -} - } // namespace RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name, @@ -120,43 +109,73 @@ void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) { stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this); } -void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) { - const auto *Literal = Result.Nodes.getNodeAs("lit"); - if (Literal->getBeginLoc().isMacroID()) - return; - - if (containsEscapedCharacters(Result, Literal, DisallowedChars)) { - std::string Replacement = asRawStringLiteral(Literal, DelimiterStem); - if (ReplaceShorterLiterals || - Replacement.length() <= - Lexer::MeasureTokenLength(Literal->getBeginLoc(), - *Result.SourceManager, getLangOpts())) - replaceWithRawStringLiteral(Result, Literal, Replacement); - } -} - -void RawStringLiteralCheck::replaceWithRawStringLiteral( - const MatchFinder::MatchResult &Result, const StringLiteral *Literal, - std::string Replacement) { - DiagnosticBuilder Builder = - diag(Literal->getBeginLoc(), - "escaped string literal can be written as a raw string literal"); - const SourceManager &SM = *Result.SourceManager; +static std::optional +createUserDefinedSuffix(const StringLiteral *Literal, const SourceManager &SM, + const LangOptions &LangOpts) { const CharSourceRange TokenRange = CharSourceRange::getTokenRange(Literal->getSourceRange()); Token T; - if (Lexer::getRawToken(Literal->getBeginLoc(), T, SM, getLangOpts())) - return; + if (Lexer::getRawToken(Literal->getBeginLoc(), T, SM, LangOpts)) + return std::nullopt; const CharSourceRange CharRange = - Lexer::makeFileCharRange(TokenRange, SM, getLangOpts()); + Lexer::makeFileCharRange(TokenRange, SM, LangOpts); if (T.hasUDSuffix()) { - const StringRef Text = Lexer::getSourceText(CharRange, SM, getLangOpts()); + StringRef Text = Lexer::getSourceText(CharRange, SM, LangOpts); const size_t UDSuffixPos = Text.find_last_of('"'); if (UDSuffixPos == StringRef::npos) - return; - Replacement += Text.slice(UDSuffixPos + 1, Text.size()); + return std::nullopt; + return Text.slice(UDSuffixPos + 1, Text.size()); + } + return std::nullopt; +} + +static std::string createRawStringLiteral(const StringLiteral *Literal, + const std::string &DelimiterStem, + const SourceManager &SM, + const LangOptions &LangOpts) { + const StringRef Bytes = Literal->getBytes(); + std::string Delimiter; + for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) { + Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I); + } + + std::optional UserDefinedSuffix = + createUserDefinedSuffix(Literal, SM, LangOpts); + + if (Delimiter.empty()) + return (R"(R"()" + Bytes + R"lit()")lit" + UserDefinedSuffix.value_or("")) + .str(); + + return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")" + + UserDefinedSuffix.value_or("")) + .str(); +} + +static bool compareStringLength(StringRef Replacement, + const StringLiteral *Literal, + const SourceManager &SM, + const LangOptions &LangOpts) { + return Replacement.size() <= + Lexer::MeasureTokenLength(Literal->getBeginLoc(), SM, LangOpts); +} + +void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) { + const auto *Literal = Result.Nodes.getNodeAs("lit"); + if (Literal->getBeginLoc().isMacroID()) + return; + const SourceManager &SM = *Result.SourceManager; + const LangOptions &LangOpts = getLangOpts(); + if (containsEscapedCharacters(Result, Literal, DisallowedChars)) { + const std::string Replacement = + createRawStringLiteral(Literal, DelimiterStem, SM, LangOpts); + if (ReplaceShorterLiterals || + compareStringLength(Replacement, Literal, SM, LangOpts)) { + diag(Literal->getBeginLoc(), + "escaped string literal can be written as a raw string literal") + << FixItHint::CreateReplacement(Literal->getSourceRange(), + Replacement); + } } - Builder << FixItHint::CreateReplacement(CharRange, Replacement); } } // namespace clang::tidy::modernize diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h index 6898e0624d1eb..879255550dd5b 100644 --- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h @@ -33,10 +33,6 @@ class RawStringLiteralCheck : public ClangTidyCheck { void check(const ast_matchers::MatchFinder::MatchResult &Result) override; private: - void replaceWithRawStringLiteral( - const ast_matchers::MatchFinder::MatchResult &Result, - const StringLiteral *Literal, std::string Replacement); - std::string DelimiterStem; CharsBitSet DisallowedChars; const bool ReplaceShorterLiterals; From 7629e01479bb1ec8b7279ec7515b3bba7e6c9e31 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 17 Jan 2025 14:05:34 +0000 Subject: [PATCH 06/88] [InstCombine,PhaseOrder] Add additional tests with align assumptions. --- .../Transforms/InstCombine/assume-align.ll | 82 ++++++++++++ .../AArch64/infer-align-from-assumption.ll | 120 ++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/AArch64/infer-align-from-assumption.ll diff --git a/llvm/test/Transforms/InstCombine/assume-align.ll b/llvm/test/Transforms/InstCombine/assume-align.ll index 47659ff8c8490..f0e0257433086 100644 --- a/llvm/test/Transforms/InstCombine/assume-align.ll +++ b/llvm/test/Transforms/InstCombine/assume-align.ll @@ -171,3 +171,85 @@ define ptr @dont_fold_assume_align_zero_of_loaded_pointer_into_align_metadata(pt call void @llvm.assume(i1 true) [ "align"(ptr %p2, i64 0) ] ret ptr %p2 } + +define ptr @redundant_assume_align_1(ptr %p) { +; CHECK-LABEL: @redundant_assume_align_1( +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i32 1) ] +; CHECK-NEXT: call void @foo(ptr [[P2]]) +; CHECK-NEXT: ret ptr [[P2]] +; + %p2 = load ptr, ptr %p + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i32 1) ] + call void @foo(ptr %p2) + ret ptr %p2 +} + + +define ptr @redundant_assume_align_8_via_align_metadata(ptr %p) { +; CHECK-LABEL: @redundant_assume_align_8_via_align_metadata( +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align [[META0:![0-9]+]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i32 8) ] +; CHECK-NEXT: call void @foo(ptr [[P2]]) +; CHECK-NEXT: ret ptr [[P2]] +; + %p2 = load ptr, ptr %p, !align !{i64 8} + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i32 8) ] + call void @foo(ptr %p2) + ret ptr %p2 +} + +define ptr @assume_align_16_via_align_metadata(ptr %p) { +; CHECK-LABEL: @assume_align_16_via_align_metadata( +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P:%.*]], align 8, !align [[META0]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i32 16) ] +; CHECK-NEXT: call void @foo(ptr [[P2]]) +; CHECK-NEXT: ret ptr [[P2]] +; + %p2 = load ptr, ptr %p, !align !{i64 8} + call void @llvm.assume(i1 true) [ "align"(ptr %p2, i32 16) ] + call void @foo(ptr %p2) + ret ptr %p2 +} + +define ptr @redundant_assume_align_8_via_align_attribute(ptr align 8 %p) { +; CHECK-LABEL: @redundant_assume_align_8_via_align_attribute( +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P:%.*]], i32 8) ] +; CHECK-NEXT: call void @foo(ptr [[P]]) +; CHECK-NEXT: ret ptr [[P]] +; + call void @llvm.assume(i1 true) [ "align"(ptr %p, i32 8) ] + call void @foo(ptr %p) + ret ptr %p +} + +define ptr @assume_align_16_via_align_attribute(ptr align 8 %p) { +; CHECK-LABEL: @assume_align_16_via_align_attribute( +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P:%.*]], i32 16) ] +; CHECK-NEXT: call void @foo(ptr [[P]]) +; CHECK-NEXT: ret ptr [[P]] +; + call void @llvm.assume(i1 true) [ "align"(ptr %p, i32 16) ] + call void @foo(ptr %p) + ret ptr %p +} + +define ptr @redundant_assume_align_8_via_asume(ptr %p) { +; CHECK-LABEL: @redundant_assume_align_8_via_asume( +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P:%.*]], i32 16) ] +; CHECK-NEXT: call void @foo(ptr [[P]]) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P]], i32 8) ] +; CHECK-NEXT: call void @foo(ptr [[P]]) +; CHECK-NEXT: ret ptr [[P]] +; + call void @llvm.assume(i1 true) [ "align"(ptr %p, i32 16) ] + call void @foo(ptr %p) + call void @llvm.assume(i1 true) [ "align"(ptr %p, i32 8) ] + call void @foo(ptr %p) + ret ptr %p +} + +declare void @foo(ptr) +;. +; CHECK: [[META0]] = !{i64 8} +;. diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/infer-align-from-assumption.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/infer-align-from-assumption.ll new file mode 100644 index 0000000000000..632e3a56aacac --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/infer-align-from-assumption.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='default' -S %s | FileCheck %s + +target triple = "arm64-apple-macosx" + +declare void @llvm.assume(i1 noundef) + +define i32 @earlycse_entry(ptr %p) { +; CHECK-LABEL: define i32 @earlycse_entry( +; CHECK-SAME: ptr nocapture [[P:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[L_I:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[L_I]], i64 4) ] +; CHECK-NEXT: [[L_ASSUME_ALIGNED_I_I:%.*]] = load i32, ptr [[L_I]], align 4 +; CHECK-NEXT: [[R_I_I:%.*]] = tail call i32 @swap(i32 [[L_ASSUME_ALIGNED_I_I]]) +; CHECK-NEXT: [[L_2_I:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr i8, ptr [[L_2_I]], i64 4 +; CHECK-NEXT: store ptr [[GEP_I]], ptr [[P]], align 8 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_I]], i64 4) ] +; CHECK-NEXT: [[L_ASSUME_ALIGNED_I_I2:%.*]] = load i32, ptr [[GEP_I]], align 4 +; CHECK-NEXT: [[R_I_I3:%.*]] = tail call i32 @swap(i32 [[L_ASSUME_ALIGNED_I_I2]]) +; CHECK-NEXT: [[L_2_I4:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: [[GEP_I5:%.*]] = getelementptr i8, ptr [[L_2_I4]], i64 4 +; CHECK-NEXT: store ptr [[GEP_I5]], ptr [[P]], align 8 +; CHECK-NEXT: ret i32 [[R_I_I3]] +; + %r.1 = call i32 @earlycse_fn1(ptr %p) + %r.2 = call i32 @earlycse_fn1(ptr %p) + ret i32 %r.2 +} + +define i32 @earlycse_fn1(ptr %p) { +; CHECK-LABEL: define i32 @earlycse_fn1( +; CHECK-SAME: ptr nocapture [[P:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[L]], i64 4) ] +; CHECK-NEXT: [[L_ASSUME_ALIGNED_I:%.*]] = load i32, ptr [[L]], align 4 +; CHECK-NEXT: [[R_I:%.*]] = tail call i32 @swap(i32 [[L_ASSUME_ALIGNED_I]]) +; CHECK-NEXT: [[L_2:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[L_2]], i64 4 +; CHECK-NEXT: store ptr [[GEP]], ptr [[P]], align 8 +; CHECK-NEXT: ret i32 [[R_I]] +; + %l = load ptr, ptr %p, align 8 + %r = call i32 @load_assume_aligned(ptr %l) + %l.2 = load ptr, ptr %p, align 8 + %gep = getelementptr i8, ptr %l.2, i64 4 + store ptr %gep, ptr %p, align 8 + ret i32 %r +} + +define i32 @load_assume_aligned(ptr %p) { +; CHECK-LABEL: define i32 @load_assume_aligned( +; CHECK-SAME: ptr [[P:%.*]]) local_unnamed_addr { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P]], i64 4) ] +; CHECK-NEXT: [[DOT0_COPYLOAD:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @swap(i32 [[DOT0_COPYLOAD]]) +; CHECK-NEXT: ret i32 [[TMP2]] +; + call void @llvm.assume(i1 true) [ "align"(ptr %p, i64 4) ] + %l.assume_aligned = load i32, ptr %p, align 1 + %r = call i32 @swap(i32 %l.assume_aligned) + ret i32 %r +} + +declare i32 @swap(i32) + +define void @sroa_align_entry(ptr %p) { +; CHECK-LABEL: define void @sroa_align_entry( +; CHECK-SAME: ptr [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P]], i64 8) ] +; CHECK-NEXT: [[DOT0_COPYLOAD_I_I_I:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[DOT0_COPYLOAD_I_I_I]] to ptr +; CHECK-NEXT: store i32 0, ptr [[TMP2]], align 4 +; CHECK-NEXT: ret void +; + %a = alloca ptr, align 8 + store ptr %p, ptr %a, align 8 + %r = call ptr @sroa_fn1(ptr %a) + store i32 0, ptr %r, align 4 + ret void +} + +define ptr @sroa_fn1(ptr %p) { +; CHECK-LABEL: define ptr @sroa_fn1( +; CHECK-SAME: ptr nocapture readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[L]], i64 8) ] +; CHECK-NEXT: [[L_FN3_I_I:%.*]] = load i64, ptr [[L]], align 8 +; CHECK-NEXT: [[I_I:%.*]] = inttoptr i64 [[L_FN3_I_I]] to ptr +; CHECK-NEXT: ret ptr [[I_I]] +; + %l = load ptr, ptr %p, align 8 + %r = call ptr @sroa_fn2(ptr %l) + ret ptr %r +} + +define ptr @sroa_fn2(ptr %p) { +; CHECK-LABEL: define ptr @sroa_fn2( +; CHECK-SAME: ptr [[P:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P]], i64 8) ] +; CHECK-NEXT: [[DOT0_COPYLOAD_I_I:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[DOT0_COPYLOAD_I_I]] to ptr +; CHECK-NEXT: ret ptr [[TMP3]] +; + %r = call i64 @sroa_fn3(ptr %p) + %i = inttoptr i64 %r to ptr + ret ptr %i +} + +define i64 @sroa_fn3(ptr %0) { +; CHECK-LABEL: define i64 @sroa_fn3( +; CHECK-SAME: ptr [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 8) ] +; CHECK-NEXT: [[DOT0_COPYLOAD_I:%.*]] = load i64, ptr [[TMP0]], align 8 +; CHECK-NEXT: ret i64 [[DOT0_COPYLOAD_I]] +; + call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 8) ] + %l.fn3 = load i64, ptr %0, align 1 + ret i64 %l.fn3 +} From dc1ef2cc1aa2de7ef6830419de37ad93eb74d34a Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 17 Jan 2025 14:11:05 +0000 Subject: [PATCH 07/88] [lldb][DWARFASTParserClang] Don't overwrite DW_AT_object_pointer of definition with that of a declaration (#123089) In https://github.com/llvm/llvm-project/pull/122742 we will start attaching DW_AT_object_pointer to method declarations (in addition to definitions). Currently when LLDB parses a `DW_TAG_subprogram` definition, it will parse all the attributes of the declaration as well. If we have `DW_AT_object_pointer` on both, then we would overwrite the more specific attribute that we got from the defintion with the one from the specification. This is problematic because LLDB relies on getting the `DW_AT_name` from the `DW_AT_object_pointer`, which doesn't exist on the specification. Note GCC does attach `DW_AT_object_pointer` on declarations *and* definitions already (see https://godbolt.org/z/G1GvddY48), so there's definitely some expressions that will fail for GCC compiled binaries. This patch will fix those cases (e.g., I would expect `TestConstThis.py` to fail with GCC). --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 7 +- .../DWARF/DWARFASTParserClangTests.cpp | 161 ++++++++++++++++++ 2 files changed, 167 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index e2f76e88dd6f0..fb3af44abfa8d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -377,7 +377,12 @@ ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { break; case DW_AT_object_pointer: - object_pointer = form_value.Reference(); + // GetAttributes follows DW_AT_specification. + // DW_TAG_subprogram definitions and declarations may both + // have a DW_AT_object_pointer. Don't overwrite the one + // we parsed for the definition with the one from the declaration. + if (!object_pointer.IsValid()) + object_pointer = form_value.Reference(); break; case DW_AT_signature: diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp index f22d76b3973e5..b31f56aa372d5 100644 --- a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp +++ b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp @@ -741,3 +741,164 @@ TEST_F(DWARFASTParserClangTests, TestUniqueDWARFASTTypeMap_CppInsertMapFind) { ASSERT_EQ(type_sp, reparsed_type_sp); } + +TEST_F(DWARFASTParserClangTests, TestParseDWARFAttributes_ObjectPointer) { + // This tests the behaviour of ParsedDWARFTypeAttributes + // for DW_TAG_subprogram definitions which have a DW_AT_object_pointer + // *and* a DW_AT_specification that also has a DW_AT_object_pointer. + // We don't want the declaration DW_AT_object_pointer to overwrite the + // one from the more specific definition's. + + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_AARCH64 +DWARF: + debug_str: + - Context + - func + - this + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x2 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Code: 0x3 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_declaration + Form: DW_FORM_flag_present + - Attribute: DW_AT_object_pointer + Form: DW_FORM_ref4 + - Attribute: DW_AT_artificial + Form: DW_FORM_flag_present + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Code: 0x4 + Tag: DW_TAG_formal_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_artificial + Form: DW_FORM_flag_present + - Code: 0x5 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_object_pointer + Form: DW_FORM_ref4 + - Attribute: DW_AT_specification + Form: DW_FORM_ref4 + - Code: 0x6 + Tag: DW_TAG_formal_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_artificial + Form: DW_FORM_flag_present + debug_info: + - Version: 5 + UnitType: DW_UT_compile + AddrSize: 8 + Entries: + +# DW_TAG_compile_unit +# DW_AT_language [DW_FORM_data2] (DW_LANG_C_plus_plus) + + - AbbrCode: 0x1 + Values: + - Value: 0x04 + +# DW_TAG_structure_type +# DW_AT_name [DW_FORM_strp] ("Context") + + - AbbrCode: 0x2 + Values: + - Value: 0x0 + +# DW_TAG_subprogram +# DW_AT_name [DW_FORM_strp] ("func") +# DW_AT_object_pointer [DW_FORM_ref4] + - AbbrCode: 0x3 + Values: + - Value: 0x8 + - Value: 0x1 + - Value: 0x1d + - Value: 0x1 + - Value: 0x1 + +# DW_TAG_formal_parameter +# DW_AT_artificial + - AbbrCode: 0x4 + Values: + - Value: 0x1 + + - AbbrCode: 0x0 + - AbbrCode: 0x0 + +# DW_TAG_subprogram +# DW_AT_object_pointer [DW_FORM_ref4] ("this") +# DW_AT_specification [DW_FORM_ref4] ("func") + - AbbrCode: 0x5 + Values: + - Value: 0x29 + - Value: 0x14 + +# DW_TAG_formal_parameter +# DW_AT_name [DW_FORM_strp] ("this") +# DW_AT_artificial + - AbbrCode: 0x6 + Values: + - Value: 0xd + - Value: 0x1 + + - AbbrCode: 0x0 + - AbbrCode: 0x0 +... +)"; + YAMLModuleTester t(yamldata); + + DWARFUnit *unit = t.GetDwarfUnit(); + ASSERT_NE(unit, nullptr); + const DWARFDebugInfoEntry *cu_entry = unit->DIE().GetDIE(); + ASSERT_EQ(cu_entry->Tag(), DW_TAG_compile_unit); + ASSERT_EQ(unit->GetDWARFLanguageType(), DW_LANG_C_plus_plus); + DWARFDIE cu_die(unit, cu_entry); + + auto holder = std::make_unique("ast"); + auto &ast_ctx = *holder->GetAST(); + DWARFASTParserClangStub ast_parser(ast_ctx); + + auto context_die = cu_die.GetFirstChild(); + ASSERT_TRUE(context_die.IsValid()); + ASSERT_EQ(context_die.Tag(), DW_TAG_structure_type); + + auto subprogram_definition = context_die.GetSibling(); + ASSERT_TRUE(subprogram_definition.IsValid()); + ASSERT_EQ(subprogram_definition.Tag(), DW_TAG_subprogram); + ASSERT_FALSE(subprogram_definition.GetAttributeValueAsOptionalUnsigned( + DW_AT_external)); + + auto param_die = subprogram_definition.GetFirstChild(); + ASSERT_TRUE(param_die.IsValid()); + + ParsedDWARFTypeAttributes attrs(subprogram_definition); + EXPECT_TRUE(attrs.object_pointer.IsValid()); + EXPECT_EQ(attrs.object_pointer, param_die); +} From eb7dea8bb15a00930b676f78f3b850079e2b964c Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Fri, 17 Jan 2025 22:14:23 +0800 Subject: [PATCH 08/88] [mutation analyzer] enhance stability for `hasAnyInit` matcher (#122915) I cannot minimal produce it, but when I run clangd with `misc-const-correctness` check in a big project clangd crashed due to deref nullptr here. clang may pass a nullptr to `InitExprs` when meets some error cases. --- clang/lib/Analysis/ExprMutationAnalyzer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index 53b838e9ede4d..cefe64409c977 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -104,6 +104,8 @@ AST_MATCHER_P(Stmt, canResolveToExpr, const Stmt *, Inner) { AST_MATCHER_P(InitListExpr, hasAnyInit, ast_matchers::internal::Matcher, InnerMatcher) { for (const Expr *Arg : Node.inits()) { + if (Arg == nullptr) + continue; ast_matchers::internal::BoundNodesTreeBuilder Result(*Builder); if (InnerMatcher.matches(*Arg, Finder, &Result)) { *Builder = std::move(Result); From 0171e56ed0b2bb0c87c48e0895f5052986fa3cda Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Fri, 17 Jan 2025 15:14:47 +0100 Subject: [PATCH 09/88] [clang][bytecode] Fix rejecting invalid sizeof expressions (#123332) Emit the invalid note instead of nothing. --- clang/lib/AST/ByteCode/Compiler.cpp | 2 +- clang/test/AST/ByteCode/literals.cpp | 25 ++++++++++--------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 6677119d09211..fca8518575594 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -2066,7 +2066,7 @@ bool Compiler::VisitUnaryExprOrTypeTraitExpr( Size = CharUnits::One(); else { if (ArgType->isDependentType() || !ArgType->isConstantSizeType()) - return false; + return this->emitInvalid(E); if (Kind == UETT_SizeOf) Size = ASTCtx.getTypeSizeInChars(ArgType); diff --git a/clang/test/AST/ByteCode/literals.cpp b/clang/test/AST/ByteCode/literals.cpp index 3d415a93a392a..fdf1a6820e446 100644 --- a/clang/test/AST/ByteCode/literals.cpp +++ b/clang/test/AST/ByteCode/literals.cpp @@ -261,31 +261,26 @@ namespace SizeOf { } #if __cplusplus >= 201402L - constexpr int IgnoredRejected() { // ref-error {{never produces a constant expression}} + constexpr int IgnoredRejected() { // both-error {{never produces a constant expression}} int n = 0; sizeof(int[n++]); // both-warning {{expression result unused}} \ - // ref-note 2{{subexpression not valid in a constant expression}} + // both-note 2{{subexpression not valid in a constant expression}} return n; } - /// FIXME: This is rejected because the parameter so sizeof() is not constant. - /// produce a proper diagnostic. static_assert(IgnoredRejected() == 0, ""); // both-error {{not an integral constant expression}} \ - // ref-note {{in call to 'IgnoredRejected()'}} + // both-note {{in call to 'IgnoredRejected()'}} #endif #if __cplusplus >= 202002L /// FIXME: The following code should be accepted. - consteval int foo(int n) { // ref-error {{consteval function never produces a constant expression}} - return sizeof(int[n]); // ref-note 3{{not valid in a constant expression}} - } - constinit int var = foo(5); // ref-error {{not a constant expression}} \ - // ref-note 2{{in call to}} \ - // ref-error {{does not have a constant initializer}} \ - // ref-note {{required by 'constinit' specifier}} \ - // expected-error {{is not a constant expression}} \ - // expected-error {{does not have a constant initializer}} \ - // expected-note {{required by 'constinit' specifier}} \ + consteval int foo(int n) { // both-error {{consteval function never produces a constant expression}} + return sizeof(int[n]); // both-note 3{{not valid in a constant expression}} + } + constinit int var = foo(5); // both-error {{not a constant expression}} \ + // both-note 2{{in call to}} \ + // both-error {{does not have a constant initializer}} \ + // both-note {{required by 'constinit' specifier}} #endif }; From baa5b769f2f76baa0ce1ebfe28236dee2c761f0d Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 17 Jan 2025 22:16:32 +0800 Subject: [PATCH 10/88] [C++20] [Modules] Make module local decls visible to language linkage in the same module Close https://github.com/llvm/llvm-project/issues/123343 See the issue and the comments in the patch for details. --- clang/lib/Sema/SemaLookup.cpp | 5 +++++ ...ule-local-visibility-in-language-linkage.cppm | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 clang/test/Modules/module-local-visibility-in-language-linkage.cppm diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index e1171d4284c76..9d8cdc9c08525 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -1624,6 +1624,11 @@ bool Sema::isUsableModule(const Module *M) { if (!Current) return false; + // For implicit global module, the decls in the same modules with the parent + // module should be visible to the decls in the implicit global module. + if (Current->isImplicitGlobalModule()) + Current = Current->getTopLevelModule(); + // If M is the module we're parsing or M and the current module unit lives in // the same module, M should be usable. // diff --git a/clang/test/Modules/module-local-visibility-in-language-linkage.cppm b/clang/test/Modules/module-local-visibility-in-language-linkage.cppm new file mode 100644 index 0000000000000..c046aef4e7486 --- /dev/null +++ b/clang/test/Modules/module-local-visibility-in-language-linkage.cppm @@ -0,0 +1,16 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: cd %t +// +// RUN: %clang_cc1 -std=c++20 %t/m.a.cppm -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/m.b.cppm -fmodule-file=m:a=%t/a.pcm -fsyntax-only -verify + +//--- m.a.cppm +export module m:a; +int a; + +//--- m.b.cppm +// expected-no-diagnostics +module m:b; +import :a; +extern "C++" int get_a() { return a; } From 48803bc8c7be25745a0e623e6753261c07281b06 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Fri, 17 Jan 2025 22:21:19 +0800 Subject: [PATCH 11/88] [X86][AMX-AVX512][NFC] Remove P from intrinsic and instruction name (#123270) Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 --- clang/include/clang/Basic/BuiltinsX86_64.td | 8 ++-- clang/lib/Headers/amxavx512intrin.h | 40 ++++++++--------- clang/lib/Sema/SemaX86.cpp | 4 +- clang/test/CodeGen/X86/amx_avx512_api.c | 16 +++---- clang/test/CodeGen/X86/amxavx512-builtins.c | 16 +++---- llvm/include/llvm/IR/IntrinsicsX86.td | 14 +++--- llvm/lib/Target/X86/X86ExpandPseudo.cpp | 24 +++++----- llvm/lib/Target/X86/X86ISelLowering.cpp | 24 +++++----- llvm/lib/Target/X86/X86InstrAMX.td | 44 +++++++++---------- llvm/lib/Target/X86/X86LowerAMXType.cpp | 4 +- llvm/lib/Target/X86/X86PreTileConfig.cpp | 8 ++-- .../test/CodeGen/X86/amx-avx512-intrinsics.ll | 36 +++++++-------- .../CodeGen/X86/amx-tile-avx512-internals.ll | 20 ++++----- llvm/test/MC/Disassembler/X86/amx-avx512.txt | 32 +++++++------- llvm/test/MC/X86/amx-avx512-att.s | 32 +++++++------- llvm/test/MC/X86/amx-avx512-intel.s | 32 +++++++------- 16 files changed, 177 insertions(+), 177 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td b/clang/include/clang/Basic/BuiltinsX86_64.td index a6c6ef80eac21..4958265298d1b 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.td +++ b/clang/include/clang/Basic/BuiltinsX86_64.td @@ -295,8 +295,8 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in { let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in { def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; - def tcvtrowps2pbf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; - def tcvtrowps2pbf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; + def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; + def tcvtrowps2bf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; def tcvtrowps2phh_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; def tcvtrowps2phl_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; @@ -387,8 +387,8 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in { let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in { def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">; - def tcvtrowps2pbf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">; - def tcvtrowps2pbf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">; + def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">; + def tcvtrowps2bf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">; def tcvtrowps2phh : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">; def tcvtrowps2phl : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">; def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">; diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h index e4d77e503015a..a158983482d5b 100644 --- a/clang/lib/Headers/amxavx512intrin.h +++ b/clang/lib/Headers/amxavx512intrin.h @@ -60,7 +60,7 @@ /// \headerfile /// /// \code -/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row); +/// __m512i _tile_cvtrowps2bf16h(__tile tsrc, unsigned int row); /// \endcode /// /// \code{.operation} @@ -80,14 +80,14 @@ /// zero_tileconfig_start() /// \endcode /// -/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction. +/// This intrinsic corresponds to the \c TCVTROWPS2BF16H instruction. /// /// \param tsrc /// The source tile. Max size is 1024 Bytes. /// \param row /// The the row of the source tile. -#define _tile_cvtrowps2pbf16h(tsrc, row) \ - __builtin_ia32_tcvtrowps2pbf16h(tsrc, row) +#define _tile_cvtrowps2bf16h(tsrc, row) \ + __builtin_ia32_tcvtrowps2bf16h(tsrc, row) /// Moves a row from a tile register to a zmm destination register, converting /// the fp32 source elements to bf16. It places the resulting bf16 elements @@ -97,7 +97,7 @@ /// \headerfile /// /// \code -/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row); +/// __m512i _tile_cvtrowps2bf16l(__tile tsrc, unsigned int row); /// \endcode /// /// \code{.operation} @@ -117,14 +117,14 @@ /// zero_tileconfig_start() /// \endcode /// -/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction. +/// This intrinsic corresponds to the \c TCVTROWPS2BF16L instruction. /// /// \param tsrc /// The source tile. Max size is 1024 Bytes. /// \param row /// The the row of the source tile. -#define _tile_cvtrowps2pbf16l(tsrc, row) \ - __builtin_ia32_tcvtrowps2pbf16l(tsrc, row) +#define _tile_cvtrowps2bf16l(tsrc, row) \ + __builtin_ia32_tcvtrowps2bf16l(tsrc, row) /// Moves a row from a tile register to a zmm destination register, converting /// the fp32 source elements to fp16. It places the resulting fp16 elements @@ -238,15 +238,15 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal( } static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 -_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n, - _tile1024i src, unsigned u) { - return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u); +_tile_cvtrowps2bf16h_internal(unsigned short m, unsigned short n, + _tile1024i src, unsigned u) { + return __builtin_ia32_tcvtrowps2bf16h_internal(m, n, src, u); } static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 -_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n, - _tile1024i src, unsigned u) { - return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u); +_tile_cvtrowps2bf16l_internal(unsigned short m, unsigned short n, + _tile1024i src, unsigned u) { + return __builtin_ia32_tcvtrowps2bf16l_internal(m, n, src, u); } static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal( @@ -290,7 +290,7 @@ static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) { /// /// \headerfile /// -/// This intrinsic corresponds to the TCVTROWPS2PBF16H instruction. +/// This intrinsic corresponds to the TCVTROWPS2BF16H instruction. /// /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. @@ -299,8 +299,8 @@ static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) { /// \returns /// The destination v32bf16 data. Size is 64 Bytes. __DEFAULT_FN_ATTRS_AVX512 -static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) { - return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1); +static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) { + return _tile_cvtrowps2bf16h_internal(src0.row, src0.col, src0.tile, src1); } /// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source @@ -309,7 +309,7 @@ static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) { /// /// \headerfile /// -/// This intrinsic corresponds to the TCVTROWPS2PBF16L instruction. +/// This intrinsic corresponds to the TCVTROWPS2BF16L instruction. /// /// \param src0 /// The 1st source tile. Max size is 1024 Bytes. @@ -318,8 +318,8 @@ static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) { /// \returns /// The destination v32bf16 data. Size is 64 Bytes. __DEFAULT_FN_ATTRS_AVX512 -static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) { - return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1); +static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) { + return _tile_cvtrowps2bf16l_internal(src0.row, src0.col, src0.tile, src1); } /// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp index 0b4b78c5b15dc..fd1a6017712d2 100644 --- a/clang/lib/Sema/SemaX86.cpp +++ b/clang/lib/Sema/SemaX86.cpp @@ -641,8 +641,8 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_t2rpntlvwz1rs: case X86::BI__builtin_ia32_t2rpntlvwz1rst1: case X86::BI__builtin_ia32_t2rpntlvwz0rs: - case X86::BI__builtin_ia32_tcvtrowps2pbf16h: - case X86::BI__builtin_ia32_tcvtrowps2pbf16l: + case X86::BI__builtin_ia32_tcvtrowps2bf16h: + case X86::BI__builtin_ia32_tcvtrowps2bf16l: case X86::BI__builtin_ia32_tcvtrowps2phh: case X86::BI__builtin_ia32_tcvtrowps2phl: case X86::BI__builtin_ia32_tcvtrowd2ps: diff --git a/clang/test/CodeGen/X86/amx_avx512_api.c b/clang/test/CodeGen/X86/amx_avx512_api.c index aea790d61268d..fac41ea6c214f 100644 --- a/clang/test/CodeGen/X86/amx_avx512_api.c +++ b/clang/test/CodeGen/X86/amx_avx512_api.c @@ -16,18 +16,18 @@ __m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) { return __tile_cvtrowd2ps(a, b); } -__m512bh test_tile_cvtrowps2pbf16h(__tile1024i a, unsigned b) { - //CHECK-LABEL: @test_tile_cvtrowps2pbf16h +__m512bh test_tile_cvtrowps2bf16h(__tile1024i a, unsigned b) { + //CHECK-LABEL: @test_tile_cvtrowps2bf16h //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal - return __tile_cvtrowps2pbf16h(a, b); + //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h.internal + return __tile_cvtrowps2bf16h(a, b); } -__m512bh test_tile_cvtrowps2pbf16l(__tile1024i a, unsigned b) { - //CHECK-LABEL: @test_tile_cvtrowps2pbf16l +__m512bh test_tile_cvtrowps2bf16l(__tile1024i a, unsigned b) { + //CHECK-LABEL: @test_tile_cvtrowps2bf16l //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal - return __tile_cvtrowps2pbf16l(a, b); + //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l.internal + return __tile_cvtrowps2bf16l(a, b); } __m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) { diff --git a/clang/test/CodeGen/X86/amxavx512-builtins.c b/clang/test/CodeGen/X86/amxavx512-builtins.c index 172b5ae8f5308..0f203349b1d1e 100644 --- a/clang/test/CodeGen/X86/amxavx512-builtins.c +++ b/clang/test/CodeGen/X86/amxavx512-builtins.c @@ -10,16 +10,16 @@ __m512 test_tile_cvtrowd2ps(unsigned int A) { return _tile_cvtrowd2ps(1, A); } -__m512bh test_tile_cvtrowps2pbf16h(unsigned int A) { - // CHECK-LABEL: @test_tile_cvtrowps2pbf16h( - // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %{{.*}}) - return _tile_cvtrowps2pbf16h(1, A); +__m512bh test_tile_cvtrowps2bf16h(unsigned int A) { + // CHECK-LABEL: @test_tile_cvtrowps2bf16h( + // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 1, i32 %{{.*}}) + return _tile_cvtrowps2bf16h(1, A); } -__m512bh test_tile_cvtrowps2pbf16l(unsigned int A) { - // CHECK-LABEL: @test_tile_cvtrowps2pbf16l( - // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %{{.*}}) - return _tile_cvtrowps2pbf16l(1, A); +__m512bh test_tile_cvtrowps2bf16l(unsigned int A) { + // CHECK-LABEL: @test_tile_cvtrowps2bf16l( + // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 1, i32 %{{.*}}) + return _tile_cvtrowps2bf16l(1, A); } __m512h test_tile_cvtrowps2phh(unsigned int A) { diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 42b211e0e1f75..fb12949e10c7e 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5999,10 +5999,10 @@ let TargetPrefix = "x86" in { def int_x86_tcvtrowd2ps : ClangBuiltin<"__builtin_ia32_tcvtrowd2ps">, Intrinsic<[llvm_v16f32_ty], [llvm_i8_ty, llvm_i32_ty], [ImmArg>]>; - def int_x86_tcvtrowps2pbf16h : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h">, + def int_x86_tcvtrowps2bf16h : ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16h">, Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty], [ImmArg>]>; - def int_x86_tcvtrowps2pbf16l : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l">, + def int_x86_tcvtrowps2bf16l : ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16l">, Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty], [ImmArg>]>; def int_x86_tcvtrowps2phh : ClangBuiltin<"__builtin_ia32_tcvtrowps2phh">, @@ -6181,13 +6181,13 @@ let TargetPrefix = "x86" in { Intrinsic<[llvm_v16f32_ty], [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty], []>; - def int_x86_tcvtrowps2pbf16h_internal : - ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h_internal">, + def int_x86_tcvtrowps2bf16h_internal : + ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16h_internal">, Intrinsic<[llvm_v32bf16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty], []>; - def int_x86_tcvtrowps2pbf16l_internal : - ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l_internal">, + def int_x86_tcvtrowps2bf16l_internal : + ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16l_internal">, Intrinsic<[llvm_v32bf16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty], []>; @@ -7893,4 +7893,4 @@ def int_x86_movrsdi : ClangBuiltin<"__builtin_ia32_movrsdi">, [IntrReadMem]>; def int_x86_prefetchrs : ClangBuiltin<"__builtin_ia32_prefetchrs">, Intrinsic<[], [llvm_ptr_ty], []>; -} \ No newline at end of file +} diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 7fbba7f05e0a5..78db8413e62c9 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -563,10 +563,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, case X86::PTILELOADDRST1V: case X86::PTCVTROWD2PSrreV: case X86::PTCVTROWD2PSrriV: - case X86::PTCVTROWPS2PBF16HrreV: - case X86::PTCVTROWPS2PBF16HrriV: - case X86::PTCVTROWPS2PBF16LrreV: - case X86::PTCVTROWPS2PBF16LrriV: + case X86::PTCVTROWPS2BF16HrreV: + case X86::PTCVTROWPS2BF16HrriV: + case X86::PTCVTROWPS2BF16LrreV: + case X86::PTCVTROWPS2BF16LrriV: case X86::PTCVTROWPS2PHHrreV: case X86::PTCVTROWPS2PHHrriV: case X86::PTCVTROWPS2PHLrreV: @@ -595,17 +595,17 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, case X86::PTCVTROWD2PSrriV: Opc = X86::TCVTROWD2PSrri; break; - case X86::PTCVTROWPS2PBF16HrreV: - Opc = X86::TCVTROWPS2PBF16Hrre; + case X86::PTCVTROWPS2BF16HrreV: + Opc = X86::TCVTROWPS2BF16Hrre; break; - case X86::PTCVTROWPS2PBF16HrriV: - Opc = X86::TCVTROWPS2PBF16Hrri; + case X86::PTCVTROWPS2BF16HrriV: + Opc = X86::TCVTROWPS2BF16Hrri; break; - case X86::PTCVTROWPS2PBF16LrreV: - Opc = X86::TCVTROWPS2PBF16Lrre; + case X86::PTCVTROWPS2BF16LrreV: + Opc = X86::TCVTROWPS2BF16Lrre; break; - case X86::PTCVTROWPS2PBF16LrriV: - Opc = X86::TCVTROWPS2PBF16Lrri; + case X86::PTCVTROWPS2BF16LrriV: + Opc = X86::TCVTROWPS2BF16Lrri; break; case X86::PTCVTROWPS2PHHrreV: Opc = X86::TCVTROWPS2PHHrre; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index dba38f3e1a0bc..5bebee765833b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37892,8 +37892,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.eraseFromParent(); // The pseudo is gone now. return BB; } - case X86::PTCVTROWPS2PBF16Hrri: - case X86::PTCVTROWPS2PBF16Lrri: + case X86::PTCVTROWPS2BF16Hrri: + case X86::PTCVTROWPS2BF16Lrri: case X86::PTCVTROWPS2PHHrri: case X86::PTCVTROWPS2PHLrri: case X86::PTCVTROWD2PSrri: @@ -37906,14 +37906,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::PTCVTROWD2PSrri: Opc = X86::TCVTROWD2PSrri; break; - case X86::PTCVTROWPS2PBF16Hrri: - Opc = X86::TCVTROWPS2PBF16Hrri; + case X86::PTCVTROWPS2BF16Hrri: + Opc = X86::TCVTROWPS2BF16Hrri; break; case X86::PTCVTROWPS2PHHrri: Opc = X86::TCVTROWPS2PHHrri; break; - case X86::PTCVTROWPS2PBF16Lrri: - Opc = X86::TCVTROWPS2PBF16Lrri; + case X86::PTCVTROWPS2BF16Lrri: + Opc = X86::TCVTROWPS2BF16Lrri; break; case X86::PTCVTROWPS2PHLrri: Opc = X86::TCVTROWPS2PHLrri; @@ -37930,8 +37930,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.eraseFromParent(); // The pseudo is gone now. return BB; } - case X86::PTCVTROWPS2PBF16Hrre: - case X86::PTCVTROWPS2PBF16Lrre: + case X86::PTCVTROWPS2BF16Hrre: + case X86::PTCVTROWPS2BF16Lrre: case X86::PTCVTROWPS2PHHrre: case X86::PTCVTROWPS2PHLrre: case X86::PTCVTROWD2PSrre: @@ -37944,11 +37944,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::PTCVTROWD2PSrre: Opc = X86::TCVTROWD2PSrre; break; - case X86::PTCVTROWPS2PBF16Hrre: - Opc = X86::TCVTROWPS2PBF16Hrre; + case X86::PTCVTROWPS2BF16Hrre: + Opc = X86::TCVTROWPS2BF16Hrre; break; - case X86::PTCVTROWPS2PBF16Lrre: - Opc = X86::TCVTROWPS2PBF16Lrre; + case X86::PTCVTROWPS2BF16Lrre: + Opc = X86::TCVTROWPS2BF16Lrre; break; case X86::PTCVTROWPS2PHHrre: Opc = X86::TCVTROWPS2PHHrre; diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index 85046228bc8c5..1beaaafb159e3 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -585,26 +585,26 @@ let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { [(set VR512: $dst, (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4))]>; - def PTCVTROWPS2PBF16HrriV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2, - TILE:$src3, imm:$src4))]>; - def PTCVTROWPS2PBF16HrreV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2, - TILE:$src3, GR32:$src4))]>; - def PTCVTROWPS2PBF16LrriV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2, - TILE:$src3, imm:$src4))]>; - def PTCVTROWPS2PBF16LrreV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2, - TILE:$src3, GR32:$src4))]>; + def PTCVTROWPS2BF16HrriV : PseudoI<(outs VR512:$dst), + (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), + [(set VR512: $dst, + (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2, + TILE:$src3, imm:$src4))]>; + def PTCVTROWPS2BF16HrreV : PseudoI<(outs VR512:$dst), + (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), + [(set VR512: $dst, + (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2, + TILE:$src3, GR32:$src4))]>; + def PTCVTROWPS2BF16LrriV : PseudoI<(outs VR512:$dst), + (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), + [(set VR512: $dst, + (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2, + TILE:$src3, imm:$src4))]>; + def PTCVTROWPS2BF16LrreV : PseudoI<(outs VR512:$dst), + (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), + [(set VR512: $dst, + (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2, + TILE:$src3, GR32:$src4))]>; def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), [(set VR512: $dst, @@ -654,8 +654,8 @@ multiclass AMXAVX512_BASE Opcode1, bits<8> Opcode2, string Opstr, defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>; defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>; -defm TCVTROWPS2PBF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2pbf16h", XD, XD>; -defm TCVTROWPS2PBF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2pbf16l", XS, XS>; +defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>; +defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>; multiclass m_tilemovrow { let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp index fe963dddaac1f..cd5813a5338ea 100644 --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -273,8 +273,8 @@ std::pair ShapeCalculator::getShape(IntrinsicInst *II, break; } case Intrinsic::x86_tcvtrowd2ps_internal: - case Intrinsic::x86_tcvtrowps2pbf16h_internal: - case Intrinsic::x86_tcvtrowps2pbf16l_internal: + case Intrinsic::x86_tcvtrowps2bf16h_internal: + case Intrinsic::x86_tcvtrowps2bf16l_internal: case Intrinsic::x86_tcvtrowps2phh_internal: case Intrinsic::x86_tcvtrowps2phl_internal: case Intrinsic::x86_tilemovrow_internal: { diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp index d232a1d706549..96801636deb9e 100644 --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -122,10 +122,10 @@ class X86PreTileConfig : public MachineFunctionPass { case X86::PTILESTOREDV: case X86::PTCVTROWD2PSrreV: case X86::PTCVTROWD2PSrriV: - case X86::PTCVTROWPS2PBF16HrreV: - case X86::PTCVTROWPS2PBF16HrriV: - case X86::PTCVTROWPS2PBF16LrreV: - case X86::PTCVTROWPS2PBF16LrriV: + case X86::PTCVTROWPS2BF16HrreV: + case X86::PTCVTROWPS2BF16HrriV: + case X86::PTCVTROWPS2BF16LrreV: + case X86::PTCVTROWPS2BF16LrriV: case X86::PTCVTROWPS2PHHrreV: case X86::PTCVTROWPS2PHHrriV: case X86::PTCVTROWPS2PHLrreV: diff --git a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll index da7fedee88821..8f82bd2587ec3 100644 --- a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll @@ -20,43 +20,43 @@ define <16 x float> @test_tcvtrowd2psi() { } declare <16 x float> @llvm.x86.tcvtrowd2ps(i8 %A, i32 %B) -define <32 x bfloat> @test_tcvtrowps2pbf16h(i32 %A) { -; CHECK-LABEL: test_tcvtrowps2pbf16h: +define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) { +; CHECK-LABEL: test_tcvtrowps2bf16h: ; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2pbf16h %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x47,0x48,0x6d,0xc1] +; CHECK-NEXT: tcvtrowps2bf16h %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x47,0x48,0x6d,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %A) + %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 1, i32 %A) ret <32 x bfloat> %ret } -define <32 x bfloat> @test_tcvtrowps2pbf16hi() { -; CHECK-LABEL: test_tcvtrowps2pbf16hi: +define <32 x bfloat> @test_tcvtrowps2bf16hi() { +; CHECK-LABEL: test_tcvtrowps2bf16hi: ; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2pbf16h $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x07,0xc1,0x7f] +; CHECK-NEXT: tcvtrowps2bf16h $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x07,0xc1,0x7f] ; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 127) + %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 1, i32 127) ret <32 x bfloat> %ret } -declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 %A, i32 %B) +declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 %A, i32 %B) -define <32 x bfloat> @test_tcvtrowps2pbf16l(i32 %A) { -; CHECK-LABEL: test_tcvtrowps2pbf16l: +define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) { +; CHECK-LABEL: test_tcvtrowps2bf16l: ; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2pbf16l %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x46,0x48,0x6d,0xc1] +; CHECK-NEXT: tcvtrowps2bf16l %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x46,0x48,0x6d,0xc1] ; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %A) + %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 1, i32 %A) ret <32 x bfloat> %ret } -define <32 x bfloat> @test_tcvtrowps2pbf16li() { -; CHECK-LABEL: test_tcvtrowps2pbf16li: +define <32 x bfloat> @test_tcvtrowps2bf16li() { +; CHECK-LABEL: test_tcvtrowps2bf16li: ; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2pbf16l $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7e,0x48,0x77,0xc1,0x7f] +; CHECK-NEXT: tcvtrowps2bf16l $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7e,0x48,0x77,0xc1,0x7f] ; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 127) + %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 1, i32 127) ret <32 x bfloat> %ret } -declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 %A, i32 %B) +declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 %A, i32 %B) define <32 x half> @test_tcvtrowps2phh(i32 %A) { ; CHECK-LABEL: test_tcvtrowps2phh: diff --git a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll index b4a5c90bbea33..fd3925fabc513 100644 --- a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll +++ b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll @@ -15,10 +15,10 @@ define void @test_amx(i8* %pointer, i8* %base, i32 %index, i64 %stride) { ; CHECK-NEXT: tileloadd (%rsi,%rcx), %tmm0 ; CHECK-NEXT: tcvtrowd2ps %edx, %tmm0, %zmm0 ; CHECK-NEXT: tcvtrowd2ps $16, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2pbf16h %edx, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2pbf16h $16, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2pbf16l %edx, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2pbf16l $16, %tmm0, %zmm0 +; CHECK-NEXT: tcvtrowps2bf16h %edx, %tmm0, %zmm0 +; CHECK-NEXT: tcvtrowps2bf16h $16, %tmm0, %zmm0 +; CHECK-NEXT: tcvtrowps2bf16l %edx, %tmm0, %zmm0 +; CHECK-NEXT: tcvtrowps2bf16l $16, %tmm0, %zmm0 ; CHECK-NEXT: tcvtrowps2phh %edx, %tmm0, %zmm0 ; CHECK-NEXT: tcvtrowps2phh $16, %tmm0, %zmm0 ; CHECK-NEXT: tcvtrowps2phl %edx, %tmm0, %zmm0 @@ -33,10 +33,10 @@ define void @test_amx(i8* %pointer, i8* %base, i32 %index, i64 %stride) { %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride) call <16 x float> @llvm.x86.tcvtrowd2ps.internal(i16 8, i16 8, x86_amx %a, i32 %index) call <16 x float> @llvm.x86.tcvtrowd2ps.internal(i16 8, i16 8, x86_amx %a, i32 16) - call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal(i16 8, i16 8, x86_amx %a, i32 %index) - call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal(i16 8, i16 8, x86_amx %a, i32 16) - call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal(i16 8, i16 8, x86_amx %a, i32 %index) - call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal(i16 8, i16 8, x86_amx %a, i32 16) + call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h.internal(i16 8, i16 8, x86_amx %a, i32 %index) + call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h.internal(i16 8, i16 8, x86_amx %a, i32 16) + call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l.internal(i16 8, i16 8, x86_amx %a, i32 %index) + call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l.internal(i16 8, i16 8, x86_amx %a, i32 16) call <32 x half> @llvm.x86.tcvtrowps2phh.internal(i16 8, i16 8, x86_amx %a, i32 %index) call <32 x half> @llvm.x86.tcvtrowps2phh.internal(i16 8, i16 8, x86_amx %a, i32 16) call <32 x half> @llvm.x86.tcvtrowps2phl.internal(i16 8, i16 8, x86_amx %a, i32 %index) @@ -54,8 +54,8 @@ declare x86_amx @llvm.x86.tileloaddt164.internal(i16, i16, i8*, i64) declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) declare <16 x float> @llvm.x86.tcvtrowd2ps.internal(i16, i16, x86_amx, i32) -declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal(i16, i16, x86_amx, i32) -declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal(i16, i16, x86_amx, i32) +declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16h.internal(i16, i16, x86_amx, i32) +declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16l.internal(i16, i16, x86_amx, i32) declare <32 x half> @llvm.x86.tcvtrowps2phh.internal(i16, i16, x86_amx, i32) declare <32 x half> @llvm.x86.tcvtrowps2phl.internal(i16, i16, x86_amx, i32) declare <16 x i32> @llvm.x86.tilemovrow.internal(i16, i16, x86_amx, i32) diff --git a/llvm/test/MC/Disassembler/X86/amx-avx512.txt b/llvm/test/MC/Disassembler/X86/amx-avx512.txt index 0a162af1b4bc0..17858f333e632 100644 --- a/llvm/test/MC/Disassembler/X86/amx-avx512.txt +++ b/llvm/test/MC/Disassembler/X86/amx-avx512.txt @@ -17,36 +17,36 @@ # INTEL: tcvtrowd2ps zmm22, tmm2, 123 0x62,0xe3,0x7e,0x48,0x07,0xf2,0x7b -# ATT: tcvtrowps2pbf16h %ecx, %tmm5, %zmm22 -# INTEL: tcvtrowps2pbf16h zmm22, tmm5, ecx +# ATT: tcvtrowps2bf16h %ecx, %tmm5, %zmm22 +# INTEL: tcvtrowps2bf16h zmm22, tmm5, ecx 0x62,0xe2,0x77,0x48,0x6d,0xf5 -# ATT: tcvtrowps2pbf16h %ecx, %tmm2, %zmm22 -# INTEL: tcvtrowps2pbf16h zmm22, tmm2, ecx +# ATT: tcvtrowps2bf16h %ecx, %tmm2, %zmm22 +# INTEL: tcvtrowps2bf16h zmm22, tmm2, ecx 0x62,0xe2,0x77,0x48,0x6d,0xf2 -# ATT: tcvtrowps2pbf16h $123, %tmm5, %zmm22 -# INTEL: tcvtrowps2pbf16h zmm22, tmm5, 123 +# ATT: tcvtrowps2bf16h $123, %tmm5, %zmm22 +# INTEL: tcvtrowps2bf16h zmm22, tmm5, 123 0x62,0xe3,0x7f,0x48,0x07,0xf5,0x7b -# ATT: tcvtrowps2pbf16h $123, %tmm2, %zmm22 -# INTEL: tcvtrowps2pbf16h zmm22, tmm2, 123 +# ATT: tcvtrowps2bf16h $123, %tmm2, %zmm22 +# INTEL: tcvtrowps2bf16h zmm22, tmm2, 123 0x62,0xe3,0x7f,0x48,0x07,0xf2,0x7b -# ATT: tcvtrowps2pbf16l %ecx, %tmm5, %zmm22 -# INTEL: tcvtrowps2pbf16l zmm22, tmm5, ecx +# ATT: tcvtrowps2bf16l %ecx, %tmm5, %zmm22 +# INTEL: tcvtrowps2bf16l zmm22, tmm5, ecx 0x62,0xe2,0x76,0x48,0x6d,0xf5 -# ATT: tcvtrowps2pbf16l %ecx, %tmm2, %zmm22 -# INTEL: tcvtrowps2pbf16l zmm22, tmm2, ecx +# ATT: tcvtrowps2bf16l %ecx, %tmm2, %zmm22 +# INTEL: tcvtrowps2bf16l zmm22, tmm2, ecx 0x62,0xe2,0x76,0x48,0x6d,0xf2 -# ATT: tcvtrowps2pbf16l $123, %tmm5, %zmm22 -# INTEL: tcvtrowps2pbf16l zmm22, tmm5, 123 +# ATT: tcvtrowps2bf16l $123, %tmm5, %zmm22 +# INTEL: tcvtrowps2bf16l zmm22, tmm5, 123 0x62,0xe3,0x7e,0x48,0x77,0xf5,0x7b -# ATT: tcvtrowps2pbf16l $123, %tmm2, %zmm22 -# INTEL: tcvtrowps2pbf16l zmm22, tmm2, 123 +# ATT: tcvtrowps2bf16l $123, %tmm2, %zmm22 +# INTEL: tcvtrowps2bf16l zmm22, tmm2, 123 0x62,0xe3,0x7e,0x48,0x77,0xf2,0x7b # ATT: tcvtrowps2phh %ecx, %tmm5, %zmm22 diff --git a/llvm/test/MC/X86/amx-avx512-att.s b/llvm/test/MC/X86/amx-avx512-att.s index 6da4ede82c621..ddab9225199a9 100644 --- a/llvm/test/MC/X86/amx-avx512-att.s +++ b/llvm/test/MC/X86/amx-avx512-att.s @@ -16,37 +16,37 @@ // CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf2,0x7b] tcvtrowd2ps $123, %tmm2, %zmm22 -// CHECK: tcvtrowps2pbf16h %ecx, %tmm5, %zmm22 +// CHECK: tcvtrowps2bf16h %ecx, %tmm5, %zmm22 // CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf5] - tcvtrowps2pbf16h %ecx, %tmm5, %zmm22 + tcvtrowps2bf16h %ecx, %tmm5, %zmm22 -// CHECK: tcvtrowps2pbf16h %ecx, %tmm2, %zmm22 +// CHECK: tcvtrowps2bf16h %ecx, %tmm2, %zmm22 // CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf2] - tcvtrowps2pbf16h %ecx, %tmm2, %zmm22 + tcvtrowps2bf16h %ecx, %tmm2, %zmm22 -// CHECK: tcvtrowps2pbf16h $123, %tmm5, %zmm22 +// CHECK: tcvtrowps2bf16h $123, %tmm5, %zmm22 // CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf5,0x7b] - tcvtrowps2pbf16h $123, %tmm5, %zmm22 + tcvtrowps2bf16h $123, %tmm5, %zmm22 -// CHECK: tcvtrowps2pbf16h $123, %tmm2, %zmm22 +// CHECK: tcvtrowps2bf16h $123, %tmm2, %zmm22 // CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf2,0x7b] - tcvtrowps2pbf16h $123, %tmm2, %zmm22 + tcvtrowps2bf16h $123, %tmm2, %zmm22 -// CHECK: tcvtrowps2pbf16l %ecx, %tmm5, %zmm22 +// CHECK: tcvtrowps2bf16l %ecx, %tmm5, %zmm22 // CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf5] - tcvtrowps2pbf16l %ecx, %tmm5, %zmm22 + tcvtrowps2bf16l %ecx, %tmm5, %zmm22 -// CHECK: tcvtrowps2pbf16l %ecx, %tmm2, %zmm22 +// CHECK: tcvtrowps2bf16l %ecx, %tmm2, %zmm22 // CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf2] - tcvtrowps2pbf16l %ecx, %tmm2, %zmm22 + tcvtrowps2bf16l %ecx, %tmm2, %zmm22 -// CHECK: tcvtrowps2pbf16l $123, %tmm5, %zmm22 +// CHECK: tcvtrowps2bf16l $123, %tmm5, %zmm22 // CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf5,0x7b] - tcvtrowps2pbf16l $123, %tmm5, %zmm22 + tcvtrowps2bf16l $123, %tmm5, %zmm22 -// CHECK: tcvtrowps2pbf16l $123, %tmm2, %zmm22 +// CHECK: tcvtrowps2bf16l $123, %tmm2, %zmm22 // CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf2,0x7b] - tcvtrowps2pbf16l $123, %tmm2, %zmm22 + tcvtrowps2bf16l $123, %tmm2, %zmm22 // CHECK: tcvtrowps2phh %ecx, %tmm5, %zmm22 // CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf5] diff --git a/llvm/test/MC/X86/amx-avx512-intel.s b/llvm/test/MC/X86/amx-avx512-intel.s index 3a517a6cd1aab..918e9126d35f9 100644 --- a/llvm/test/MC/X86/amx-avx512-intel.s +++ b/llvm/test/MC/X86/amx-avx512-intel.s @@ -16,37 +16,37 @@ // CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf2,0x7b] tcvtrowd2ps zmm22, tmm2, 123 -// CHECK: tcvtrowps2pbf16h zmm22, tmm5, ecx +// CHECK: tcvtrowps2bf16h zmm22, tmm5, ecx // CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf5] - tcvtrowps2pbf16h zmm22, tmm5, ecx + tcvtrowps2bf16h zmm22, tmm5, ecx -// CHECK: tcvtrowps2pbf16h zmm22, tmm2, ecx +// CHECK: tcvtrowps2bf16h zmm22, tmm2, ecx // CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf2] - tcvtrowps2pbf16h zmm22, tmm2, ecx + tcvtrowps2bf16h zmm22, tmm2, ecx -// CHECK: tcvtrowps2pbf16h zmm22, tmm5, 123 +// CHECK: tcvtrowps2bf16h zmm22, tmm5, 123 // CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf5,0x7b] - tcvtrowps2pbf16h zmm22, tmm5, 123 + tcvtrowps2bf16h zmm22, tmm5, 123 -// CHECK: tcvtrowps2pbf16h zmm22, tmm2, 123 +// CHECK: tcvtrowps2bf16h zmm22, tmm2, 123 // CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf2,0x7b] - tcvtrowps2pbf16h zmm22, tmm2, 123 + tcvtrowps2bf16h zmm22, tmm2, 123 -// CHECK: tcvtrowps2pbf16l zmm22, tmm5, ecx +// CHECK: tcvtrowps2bf16l zmm22, tmm5, ecx // CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf5] - tcvtrowps2pbf16l zmm22, tmm5, ecx + tcvtrowps2bf16l zmm22, tmm5, ecx -// CHECK: tcvtrowps2pbf16l zmm22, tmm2, ecx +// CHECK: tcvtrowps2bf16l zmm22, tmm2, ecx // CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf2] - tcvtrowps2pbf16l zmm22, tmm2, ecx + tcvtrowps2bf16l zmm22, tmm2, ecx -// CHECK: tcvtrowps2pbf16l zmm22, tmm5, 123 +// CHECK: tcvtrowps2bf16l zmm22, tmm5, 123 // CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf5,0x7b] - tcvtrowps2pbf16l zmm22, tmm5, 123 + tcvtrowps2bf16l zmm22, tmm5, 123 -// CHECK: tcvtrowps2pbf16l zmm22, tmm2, 123 +// CHECK: tcvtrowps2bf16l zmm22, tmm2, 123 // CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf2,0x7b] - tcvtrowps2pbf16l zmm22, tmm2, 123 + tcvtrowps2bf16l zmm22, tmm2, 123 // CHECK: tcvtrowps2phh zmm22, tmm5, ecx // CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf5] From 8688a31729cfd2f069fc53a5081273d61803a32f Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 17 Jan 2025 09:28:33 -0500 Subject: [PATCH 12/88] [libc++] Allow passing arguments to GoogleBenchmark's compare.py tool --- libcxx/utils/libcxx-compare-benchmarks | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/libcxx/utils/libcxx-compare-benchmarks b/libcxx/utils/libcxx-compare-benchmarks index e04820fc57ed9..08c53b2420c8e 100755 --- a/libcxx/utils/libcxx-compare-benchmarks +++ b/libcxx/utils/libcxx-compare-benchmarks @@ -7,15 +7,16 @@ MONOREPO_ROOT="$(realpath $(dirname "${PROGNAME}"))" function usage() { cat < benchmarks... +${PROGNAME} [-h|--help] benchmarks... [-- gbench-args...] Compare the given benchmarks between the baseline and the candidate build directories. This requires those benchmarks to have already been generated in both build directories. - The path to the build directory considered the baseline. - The path to the build directory considered the candidate. -benchmarks... Paths of the benchmarks to compare. Those paths are relative to ''. + The path to the build directory considered the baseline. + The path to the build directory considered the candidate. +benchmarks... Paths of the benchmarks to compare. Those paths are relative to ''. +[-- gbench-args...] Any arguments provided after '--' will be passed as-is to GoogleBenchmark's compare.py tool. Example ======= @@ -45,7 +46,17 @@ python3 -m venv /tmp/libcxx-compare-benchmarks-venv source /tmp/libcxx-compare-benchmarks-venv/bin/activate pip3 install -r ${GBENCH}/tools/requirements.txt -for benchmark in ${@}; do +benchmarks="" +while [[ $# -gt 0 ]]; do + if [[ "${1}" == "--" ]]; then + shift + break + fi + benchmarks+=" ${1}" + shift +done + +for benchmark in ${benchmarks}; do base="$(${MONOREPO_ROOT}/libcxx/utils/libcxx-benchmark-json ${baseline} ${benchmark})" cand="$(${MONOREPO_ROOT}/libcxx/utils/libcxx-benchmark-json ${candidate} ${benchmark})" @@ -58,5 +69,5 @@ for benchmark in ${@}; do continue fi - "${GBENCH}/tools/compare.py" benchmarks "${base}" "${cand}" + "${GBENCH}/tools/compare.py" benchmarks "${base}" "${cand}" ${@} done From 703e9e97d937f3bb25d4318d86e357a665e72731 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Fri, 17 Jan 2025 09:36:55 -0500 Subject: [PATCH 13/88] [AMDGPU][True16][CodeGen] true16 codegen for bswap (#122849) true16 codegen pattern for bswap --- llvm/lib/Target/AMDGPU/SIInstructions.td | 15 ++++++++++++++ llvm/test/CodeGen/AMDGPU/bswap.ll | 25 ++++++++++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 1abbf4c217a69..40a20fa9cb15e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3041,6 +3041,8 @@ def : GCNPat < // Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24) // The 12s emit 0s. +foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in +let True16Predicate = p in { def : GCNPat < (i16 (bswap i16:$a)), (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001))) @@ -3050,6 +3052,19 @@ def : GCNPat < (i32 (zext (bswap i16:$a))), (V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001))) >; +} + +let True16Predicate = UseRealTrue16Insts in { +def : GCNPat < + (i16 (bswap i16:$a)), + (EXTRACT_SUBREG (V_PERM_B32_e64 (i32 0), (COPY VGPR_16:$a), (S_MOV_B32 (i32 0x0c0c0001))), lo16) +>; + +def : GCNPat < + (i32 (zext (bswap i16:$a))), + (V_PERM_B32_e64 (i32 0), (COPY VGPR_16:$a), (S_MOV_B32 (i32 0x0c0c0001))) +>; +} // Magic number: 1 | (0 << 8) | (3 << 16) | (2 << 24) def : GCNPat < diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll index 30c8e94c9a27f..a95a1aba0c914 100644 --- a/llvm/test/CodeGen/AMDGPU/bswap.ll +++ b/llvm/test/CodeGen/AMDGPU/bswap.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=amdgcn-- -verify-machineinstrs | FileCheck %s --check-prefix=SI ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI -; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=GFX11 +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16 +; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16 declare i16 @llvm.bswap.i16(i16) nounwind readnone declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) nounwind readnone @@ -490,13 +491,21 @@ define float @missing_truncate_promote_bswap(i32 %arg) { ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: missing_truncate_promote_bswap: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-REAL16-LABEL: missing_truncate_promote_bswap: +; GFX11-REAL16: ; %bb.0: ; %bb +; GFX11-REAL16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-REAL16-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 +; GFX11-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l +; GFX11-REAL16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: missing_truncate_promote_bswap: +; GFX11-FAKE16: ; %bb.0: ; %bb +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, 0, v0, 0xc0c0001 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] bb: %tmp = trunc i32 %arg to i16 %tmp1 = call i16 @llvm.bswap.i16(i16 %tmp) From a18f4bdb18d59858e384540a62c9145c888cc9b2 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Fri, 17 Jan 2025 09:38:52 -0500 Subject: [PATCH 14/88] [AMDGPU][True16][MC] true16 for v_cmpx_lt_f16 (#122936) True16 format for v_cmpx_lt_f16. Update VOPCX t16 and fake16 pseudo. --- llvm/lib/Target/AMDGPU/VOPCInstructions.td | 133 ++++++++++++++---- .../AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s | 65 +++++---- .../AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s | 25 ++-- .../MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s | 14 +- llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s | 75 ++++++---- llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s | 65 +++++---- llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s | 21 ++- llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s | 44 ++++-- .../MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s | 44 ++++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s | 14 +- llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s | 73 +++++----- llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s | 33 +++-- llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s | 72 ++++++---- llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s | 62 ++++---- llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s | 18 ++- llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s | 44 ++++-- .../MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s | 44 ++++-- .../gfx11_dasm_vop3_dpp16_from_vopcx.txt | 62 +++++--- .../gfx11_dasm_vop3_dpp8_from_vopcx.txt | 32 +++-- .../AMDGPU/gfx11_dasm_vop3_from_vopcx.txt | 22 ++- .../Disassembler/AMDGPU/gfx11_dasm_vopcx.txt | 73 +++++++--- .../AMDGPU/gfx11_dasm_vopcx_dpp16.txt | 62 +++++--- .../AMDGPU/gfx11_dasm_vopcx_dpp8.txt | 26 +++- .../AMDGPU/gfx12_dasm_vop3_from_vop1.txt | 2 - .../gfx12_dasm_vop3_from_vop1_dpp16.txt | 2 - .../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt | 2 - .../Disassembler/AMDGPU/gfx12_dasm_vop3cx.txt | 22 ++- .../AMDGPU/gfx12_dasm_vop3cx_dpp16.txt | 112 ++++++++++++--- .../AMDGPU/gfx12_dasm_vop3cx_dpp8.txt | 35 +++-- .../Disassembler/AMDGPU/gfx12_dasm_vopcx.txt | 69 ++++++--- .../AMDGPU/gfx12_dasm_vopcx_dpp16.txt | 58 +++++--- .../AMDGPU/gfx12_dasm_vopcx_dpp8.txt | 22 ++- 32 files changed, 993 insertions(+), 454 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 14e34c9e00ec6..bba8aa570d2b5 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -153,8 +153,7 @@ class VOPC_NoSdst_Profile sched, ValueType vt0, let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, src0_sel:$src0_sel, src1_sel:$src1_sel); - let AsmVOP3Base = !if(Src0VT.isFP, "$src0_modifiers, $src1_modifiers$clamp", - "$src0, $src1"); + let HasDst = 0; let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let EmitDst = 0; } @@ -164,23 +163,53 @@ multiclass VOPC_NoSdst_Profile_t16 sched, ValueType vt0, Va def _t16 : VOPC_NoSdst_Profile { let IsTrue16 = 1; let IsRealTrue16 = 1; - let Src1RC32 = getVregSrcForVT.ret; - let Src0DPP = getVregSrcForVT.ret; - let Src1DPP = getVregSrcForVT.ret; - let Src2DPP = getVregSrcForVT.ret; - let Src0ModDPP = getSrcModDPP_t16.ret; - let Src1ModDPP = getSrcModDPP_t16.ret; - let Src2ModDPP = getSrcModDPP_t16.ret; + let HasOpSel = 1; + let HasModifiers = 1; // All instructions at least have OpSel + let Src0RC32 = getVOPSrc0ForVT.ret; + let Src1RC32 = getVregSrcForVT.ret; + let Src0DPP = getVregSrcForVT.ret; + let Src1DPP = getVregSrcForVT.ret; + let Src2DPP = getVregSrcForVT.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + let Src0VOP3DPP = VGPRSrc_16; + let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; + + let Src0RC64 = getVOP3SrcForVT.ret; + let Src1RC64 = getVOP3SrcForVT.ret; + let Src2RC64 = getVOP3SrcForVT.ret; + let Src0Mod = getSrc0Mod.ret; + let Src1Mod = getSrcMod.ret; + let Src2Mod = getSrcMod.ret; + let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; } def _fake16 : VOPC_NoSdst_Profile { let IsTrue16 = 1; + let Src0RC32 = getVOPSrc0ForVT.ret; let Src1RC32 = getVregSrcForVT.ret; let Src0DPP = getVregSrcForVT.ret; let Src1DPP = getVregSrcForVT.ret; let Src2DPP = getVregSrcForVT.ret; - let Src0ModDPP = getSrcModDPP_t16.ret; - let Src1ModDPP = getSrcModDPP_t16.ret; - let Src2ModDPP = getSrcModDPP_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + let Src0VOP3DPP = VGPRSrc_32; + let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; + + let Src0RC64 = getVOP3SrcForVT.ret; + let Src1RC64 = getVOP3SrcForVT.ret; + let Src2RC64 = getVOP3SrcForVT.ret; + let Src0Mod = getSrc0Mod.ret; + let Src1Mod = getSrcMod.ret; + let Src2Mod = getSrcMod.ret; + let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; } } @@ -1408,7 +1437,7 @@ class VOPC64_DPP16_Dst op, VOP_DPP_Pseudo ps, class VOPC64_DPP16_NoDst op, VOP_DPP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP, VOP3_DPP_Enc { - let Inst{7-0} = ? ; + let Inst{7-0} = ?; } class VOPC64_DPP16_Dst_t16 op, VOP_DPP_Pseudo ps, @@ -1419,6 +1448,13 @@ class VOPC64_DPP16_Dst_t16 op, VOP_DPP_Pseudo ps, let Inst{14} = 0; } +class VOPC64_DPP16_NoDst_t16 op, VOP_DPP_Pseudo ps, + string opName = ps.OpName> + : VOPC64_DPP, VOP3_DPP_Enc_t16 { + let Inst{7-0} = ?; + let Inst{14} = 0; +} + class VOPC64_DPP8 : VOP3_DPP8_Base { Instruction Opcode = !cast(NAME); @@ -1440,7 +1476,7 @@ class VOPC64_DPP8_Dst op, VOP_Pseudo ps, string opName = ps.OpName> class VOPC64_DPP8_NoDst op, VOP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP8, VOP3_DPP8_Enc { - let Inst{7-0} = ? ; + let Inst{7-0} = ?; let Constraints = ""; } @@ -1452,6 +1488,13 @@ class VOPC64_DPP8_Dst_t16 op, VOP_Pseudo ps, string opName = ps.OpName> let Constraints = ""; } +class VOPC64_DPP8_NoDst_t16 op, VOP_Pseudo ps, string opName = ps.OpName> + : VOPC64_DPP8, VOP3_DPP8_Enc_t16 { + let Inst{7-0} = ?; + let Inst{14} = 0; + let Constraints = ""; +} + //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// @@ -1619,7 +1662,7 @@ multiclass VOPCX_Real op> { # " " # ps32.AsmOperands; } def _e64#Gen.Suffix : - VOP3_Real, + VOP3_Real_Gen, VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> { let Inst{7-0} = ?; // sdst let AsmString = !subst("_nosdst", "", ps64.Mnemonic) @@ -1677,11 +1720,22 @@ multiclass VOPCX_Real_with_name op, string OpName, VOPCe { let AsmString = asm_name # "{_e32} " # ps32.AsmOperands; } - def _e64#Gen.Suffix - : VOP3_Real_Gen, - VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> { - let Inst{7-0} = ? ; // sdst - let AsmString = asm_name # "{_e64} " # ps64.AsmOperands; + + if ps64.Pfl.IsRealTrue16 then { + def _e64#Gen.Suffix + : VOP3_Real_Gen, + VOP3e_t16_gfx11_gfx12<{0, op}, ps64.Pfl> { + let Inst{7-0} = ?; // sdst + let Inst{14} = 0; + let AsmString = asm_name # "{_e64} " # ps64.AsmOperands; + } + } else { + def _e64#Gen.Suffix + : VOP3_Real_Gen, + VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> { + let Inst{7-0} = ?; // sdst + let AsmString = asm_name # "{_e64} " # ps64.AsmOperands; + } } defm : VOPCXInstAliases; @@ -1695,14 +1749,25 @@ multiclass VOPCX_Real_with_name op, string OpName, if ps64.Pfl.HasExtVOP3DPP then { defvar psDPP = !cast(OpName#"_nosdst_e64"#"_dpp"); defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16; - def _e64_dpp#Gen.Suffix - : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>, - SIMCInstr { - let AsmString = asm_name # "{_e64_dpp} " # AsmDPP; - } defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8; - def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> { - let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8; + if ps64.Pfl.IsRealTrue16 then { + def _e64_dpp#Gen.Suffix + : VOPC64_DPP16_NoDst_t16<{0, op}, psDPP, asm_name>, + SIMCInstr { + let AsmString = asm_name # "{_e64_dpp} " # AsmDPP; + } + def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst_t16<{0, op}, ps64, asm_name> { + let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8; + } + } else { + def _e64_dpp#Gen.Suffix + : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>, + SIMCInstr { + let AsmString = asm_name # "{_e64_dpp} " # AsmDPP; + } + def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> { + let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8; + } } } // End if ps64.Pfl.HasExtVOP3DPP } // End DecoderNamespace @@ -1756,11 +1821,23 @@ multiclass VOPCX_Real_t16_gfx11 op, string asm_name, string OpName = NAME, string pseudo_mnemonic = ""> : VOPCX_Real_t16; +multiclass VOPCX_Real_t16_and_fake16_gfx11 op, string asm_name, + string OpName = NAME, string pseudo_mnemonic = ""> { + defm _t16: VOPCX_Real_t16_gfx11; + defm _fake16: VOPCX_Real_t16_gfx11; +} + multiclass VOPCX_Real_t16_gfx11_gfx12 op, string asm_name, string OpName = NAME, string pseudo_mnemonic = ""> : VOPCX_Real_t16, VOPCX_Real_t16; +multiclass VOPCX_Real_t16_and_fake16_gfx11_gfx12 op, string asm_name, + string OpName = NAME, string pseudo_mnemonic = ""> { + defm _t16: VOPCX_Real_t16_gfx11_gfx12; + defm _fake16: VOPCX_Real_t16_gfx11_gfx12; +} + defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">; defm V_CMP_LT_F16 : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x001, "v_cmp_lt_f16">; defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">; @@ -1848,7 +1925,7 @@ defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>; defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>; defm V_CMPX_F_F16_fake16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">; -defm V_CMPX_LT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">; +defm V_CMPX_LT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">; defm V_CMPX_EQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">; defm V_CMPX_LE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">; defm V_CMPX_GT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s index 462ad7ba6516d..60ec94446235e 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s @@ -1347,47 +1347,56 @@ v_cmpx_lg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1 +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15 +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1 +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15 +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1 +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15 +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x0a,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x93,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX11: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s index 46f1db837b0dd..fb2b28874bd04 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s @@ -326,17 +326,26 @@ v_cmpx_lg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x95,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x81,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x81,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x19,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x0a,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x93,0x81,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s index 371d29f2a2cb6..7a95d8cd53cde 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s @@ -2066,11 +2066,11 @@ v_cmpx_lg_f64_e64 -|src_scc|, -|exec| v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp // GFX11: v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa5,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] -v_cmpx_lt_f16_e64 v1, v2 -// GFX11: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] +v_cmpx_lt_f16_e64 v1.l, v2.l +// GFX11: v_cmpx_lt_f16_e64 v1.l, v2.l ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] -v_cmpx_lt_f16_e64 v255, v255 -// GFX11: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] +v_cmpx_lt_f16_e64 v255.l, v255.l +// GFX11: v_cmpx_lt_f16_e64 v255.l, v255.l ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] v_cmpx_lt_f16_e64 s1, s2 // GFX11: v_cmpx_lt_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00] @@ -2111,6 +2111,12 @@ v_cmpx_lt_f16_e64 -src_scc, |vcc_lo| v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp // GFX11: v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_cmpx_lt_f16_e64 v1.h, v2.l +// GFX11: v_cmpx_lt_f16_e64 v1.h, v2.l ; encoding: [0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_f16_e64 v255.l, v255.h +// GFX11: v_cmpx_lt_f16_e64 v255.l, v255.h ; encoding: [0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00] + v_cmpx_lt_f32_e64 v1, v2 // GFX11: v_cmpx_lt_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s index 82c43e1a91b6a..42d7c5ea600b4 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s @@ -2054,50 +2054,65 @@ v_cmpx_lg_f64 src_scc, v[2:3] v_cmpx_lg_f64 0xaf123456, v[254:255] // GFX11: v_cmpx_lg_f64_e32 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x4b,0x7d,0x56,0x34,0x12,0xaf] -v_cmpx_lt_f16 v1, v2 -// GFX11: v_cmpx_lt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x02,0x7d] +v_cmpx_lt_f16 v1.l, v2.l +// GFX11: v_cmpx_lt_f16_e32 v1.l, v2.l ; encoding: [0x01,0x05,0x02,0x7d] -v_cmpx_lt_f16 v127, v2 -// GFX11: v_cmpx_lt_f16_e32 v127, v2 ; encoding: [0x7f,0x05,0x02,0x7d] +v_cmpx_lt_f16 v127.l, v2.l +// GFX11: v_cmpx_lt_f16_e32 v127.l, v2.l ; encoding: [0x7f,0x05,0x02,0x7d] -v_cmpx_lt_f16 s1, v2 -// GFX11: v_cmpx_lt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x02,0x7d] +v_cmpx_lt_f16 s1, v2.l +// GFX11: v_cmpx_lt_f16_e32 s1, v2.l ; encoding: [0x01,0x04,0x02,0x7d] -v_cmpx_lt_f16 s105, v2 -// GFX11: v_cmpx_lt_f16_e32 s105, v2 ; encoding: [0x69,0x04,0x02,0x7d] +v_cmpx_lt_f16 s105, v2.l +// GFX11: v_cmpx_lt_f16_e32 s105, v2.l ; encoding: [0x69,0x04,0x02,0x7d] -v_cmpx_lt_f16 vcc_lo, v2 -// GFX11: v_cmpx_lt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x02,0x7d] +v_cmpx_lt_f16 vcc_lo, v2.l +// GFX11: v_cmpx_lt_f16_e32 vcc_lo, v2.l ; encoding: [0x6a,0x04,0x02,0x7d] -v_cmpx_lt_f16 vcc_hi, v2 -// GFX11: v_cmpx_lt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x02,0x7d] +v_cmpx_lt_f16 vcc_hi, v2.l +// GFX11: v_cmpx_lt_f16_e32 vcc_hi, v2.l ; encoding: [0x6b,0x04,0x02,0x7d] -v_cmpx_lt_f16 ttmp15, v2 -// GFX11: v_cmpx_lt_f16_e32 ttmp15, v2 ; encoding: [0x7b,0x04,0x02,0x7d] +v_cmpx_lt_f16 ttmp15, v2.l +// GFX11: v_cmpx_lt_f16_e32 ttmp15, v2.l ; encoding: [0x7b,0x04,0x02,0x7d] -v_cmpx_lt_f16 m0, v2 -// GFX11: v_cmpx_lt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x02,0x7d] +v_cmpx_lt_f16 m0, v2.l +// GFX11: v_cmpx_lt_f16_e32 m0, v2.l ; encoding: [0x7d,0x04,0x02,0x7d] -v_cmpx_lt_f16 exec_lo, v2 -// GFX11: v_cmpx_lt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x02,0x7d] +v_cmpx_lt_f16 exec_lo, v2.l +// GFX11: v_cmpx_lt_f16_e32 exec_lo, v2.l ; encoding: [0x7e,0x04,0x02,0x7d] -v_cmpx_lt_f16 exec_hi, v2 -// GFX11: v_cmpx_lt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x02,0x7d] +v_cmpx_lt_f16 exec_hi, v2.l +// GFX11: v_cmpx_lt_f16_e32 exec_hi, v2.l ; encoding: [0x7f,0x04,0x02,0x7d] -v_cmpx_lt_f16 null, v2 -// GFX11: v_cmpx_lt_f16_e32 null, v2 ; encoding: [0x7c,0x04,0x02,0x7d] +v_cmpx_lt_f16 null, v2.l +// GFX11: v_cmpx_lt_f16_e32 null, v2.l ; encoding: [0x7c,0x04,0x02,0x7d] -v_cmpx_lt_f16 -1, v2 -// GFX11: v_cmpx_lt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x02,0x7d] +v_cmpx_lt_f16 -1, v2.l +// GFX11: v_cmpx_lt_f16_e32 -1, v2.l ; encoding: [0xc1,0x04,0x02,0x7d] -v_cmpx_lt_f16 0.5, v2 -// GFX11: v_cmpx_lt_f16_e32 0.5, v2 ; encoding: [0xf0,0x04,0x02,0x7d] +v_cmpx_lt_f16 0.5, v2.l +// GFX11: v_cmpx_lt_f16_e32 0.5, v2.l ; encoding: [0xf0,0x04,0x02,0x7d] -v_cmpx_lt_f16 src_scc, v2 -// GFX11: v_cmpx_lt_f16_e32 src_scc, v2 ; encoding: [0xfd,0x04,0x02,0x7d] +v_cmpx_lt_f16 src_scc, v2.l +// GFX11: v_cmpx_lt_f16_e32 src_scc, v2.l ; encoding: [0xfd,0x04,0x02,0x7d] -v_cmpx_lt_f16 0xfe0b, v127 -// GFX11: v_cmpx_lt_f16_e32 0xfe0b, v127 ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] +v_cmpx_lt_f16 0xfe0b, v127.l +// GFX11: v_cmpx_lt_f16_e32 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_f16 v1.h, v2.l +// GFX11: v_cmpx_lt_f16_e32 v1.h, v2.l ; encoding: [0x81,0x05,0x02,0x7d] + +v_cmpx_lt_f16 v127.h, v2.l +// GFX11: v_cmpx_lt_f16_e32 v127.h, v2.l ; encoding: [0xff,0x05,0x02,0x7d] + +v_cmpx_lt_f16 0.5, v127.l +// GFX11: v_cmpx_lt_f16_e32 0.5, v127.l ; encoding: [0xf0,0xfe,0x02,0x7d] + +v_cmpx_lt_f16 src_scc, v2.h +// GFX11: v_cmpx_lt_f16_e32 src_scc, v2.h ; encoding: [0xfd,0x04,0x03,0x7d] + +v_cmpx_lt_f16 0xfe0b, v127.h +// GFX11: v_cmpx_lt_f16_e32 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x03,0x7d,0x0b,0xfe,0x00,0x00] v_cmpx_lt_f32 v1, v2 // GFX11: v_cmpx_lt_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x22,0x7d] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s index b2ea4348f33b8..57185330971e1 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s @@ -1346,47 +1346,56 @@ v_cmpx_lg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x2b,0x7d,0xff,0x6f,0xf5,0x30] -v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f16 v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16 v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] +v_cmpx_lt_f16 v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_cmpx_lt_f16 v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] -v_cmpx_lt_f16 v1, v2 row_mirror -// GFX11: v_cmpx_lt_f16 v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_mirror +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_half_mirror -// GFX11: v_cmpx_lt_f16 v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_half_mirror +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_shl:1 -// GFX11: v_cmpx_lt_f16 v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_shl:1 +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_shl:15 -// GFX11: v_cmpx_lt_f16 v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_shl:15 +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_shr:1 -// GFX11: v_cmpx_lt_f16 v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_shr:1 +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_shr:15 -// GFX11: v_cmpx_lt_f16 v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_shr:15 +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_ror:1 -// GFX11: v_cmpx_lt_f16 v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_ror:1 +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_ror:15 -// GFX11: v_cmpx_lt_f16 v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_ror:15 +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] +v_cmpx_lt_f16 v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] -v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x09,0x13] +v_cmpx_lt_f16 v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_cmpx_lt_f16 v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x09,0x13] -v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xf5,0x30] +v_cmpx_lt_f16 -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_cmpx_lt_f16 -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_lt_f16 v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cmpx_lt_f16 v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x5f,0x01,0x01] + +v_cmpx_lt_f16 v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_cmpx_lt_f16 v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x03,0x7d,0x81,0x60,0x09,0x13] + +v_cmpx_lt_f16 -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cmpx_lt_f16 -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x03,0x7d,0xff,0x6f,0xf5,0x30] v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0] // GFX11: v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s index b4c556cf0328a..e78840e08c497 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s @@ -290,14 +290,23 @@ v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x2b,0x7d,0xff,0x00,0x00,0x00] -v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] +v_cmpx_lt_f16 v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_cmpx_lt_f16 v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lt_f16 v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16 v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x77,0x39,0x05] + +v_cmpx_lt_f16 v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cmpx_lt_f16 v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x03,0x7d,0x81,0x77,0x39,0x05] + +v_cmpx_lt_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cmpx_lt_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x03,0x7d,0xff,0x00,0x00,0x00] v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s index ec628dd94f366..7c9fa7f846d47 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s v_cmpx_class_f16_e32 v1, v255 @@ -271,23 +271,41 @@ v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_cmpx_lt_f16_e32 v1.h, v255.h +// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1.h, v255.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_cmpx_lt_f16_e32 v1.l, v255.l +// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.h, v2.h +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.h, v2.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.l, v2.l +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.l, v2.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction v_cmpx_lt_i16_e32 v1, v255 // GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s index 3bbdf3d3a903f..bffe5c7251ddf 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s v_cmpx_class_f16 v1, v255 @@ -271,23 +271,41 @@ v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0] // GFX11: v_cmpx_lg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_lt_f16 v1, v255 -// GFX11: v_cmpx_lt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xff,0x03,0x00] +v_cmpx_lt_f16 v1.h, v255.h +// GFX11: v_cmpx_lt_f16_e64 v1.h, v255.h ; encoding: [0x7e,0x18,0x81,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f16 v1.h, v255.h quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x18,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v255, v2 -// GFX11: v_cmpx_lt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0x05,0x02,0x00] +v_cmpx_lt_f16 v1.l, v255.l +// GFX11: v_cmpx_lt_f16_e64 v1.l, v255.l ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] +v_cmpx_lt_f16 v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] +v_cmpx_lt_f16 v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f16 v255.h, v2.h +// GFX11: v_cmpx_lt_f16_e64 v255.h, v2.h ; encoding: [0x7e,0x18,0x81,0xd4,0xff,0x05,0x02,0x00] + +v_cmpx_lt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_cmpx_lt_f16 v255.h, v2.h quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x18,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] + +v_cmpx_lt_f16 v255.l, v2.l +// GFX11: v_cmpx_lt_f16_e64 v255.l, v2.l ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0x05,0x02,0x00] + +v_cmpx_lt_f16 v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_cmpx_lt_f16 v255.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] v_cmpx_lt_i16 v1, v255 // GFX11: v_cmpx_lt_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xff,0x03,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s index 476ea846f603a..6730482540060 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s @@ -1775,11 +1775,11 @@ v_cmpx_lg_f64_e64 -|src_scc|, -|exec| v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp // GFX12: v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa5,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] -v_cmpx_lt_f16_e64 v1, v2 -// GFX12: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] +v_cmpx_lt_f16_e64 v1.l, v2.l +// GFX12: v_cmpx_lt_f16_e64 v1.l, v2.l ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] -v_cmpx_lt_f16_e64 v255, v255 -// GFX12: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] +v_cmpx_lt_f16_e64 v255.l, v255.l +// GFX12: v_cmpx_lt_f16_e64 v255.l, v255.l ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] v_cmpx_lt_f16_e64 s1, s2 // GFX12: v_cmpx_lt_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00] @@ -1820,6 +1820,12 @@ v_cmpx_lt_f16_e64 -src_scc, |vcc_lo| v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp // GFX12: v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_cmpx_lt_f16_e64 v1.h, v2.l +// GFX12: v_cmpx_lt_f16_e64 v1.h, v2.l ; encoding: [0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00] + +v_cmpx_lt_f16_e64 v255.l, v255.h +// GFX12: v_cmpx_lt_f16_e64 v255.l, v255.h ; encoding: [0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00] + v_cmpx_lt_f32_e64 v1, v2 // GFX12: v_cmpx_lt_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s index a6953ecc1d78a..2ffdf04ff886a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s @@ -1346,53 +1346,62 @@ v_cmpx_lg_f32_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, s2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, 2.0 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, 2.0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1 +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15 +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1 +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15 +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1 +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15 +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x0a,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x93,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s index 8e2899086f2bd..05bce2e0e61f2 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s @@ -452,23 +452,32 @@ v_cmpx_lg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x95,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16_e64_dpp -v1.l, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1.l, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_cmpx_lt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16_e64_dpp -v1.l, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1.l, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x02,0x81,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x81,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x83,0x81,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x19,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x0a,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x93,0x81,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s index 300c748145141..1392b9b8112f9 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s @@ -1766,50 +1766,62 @@ v_cmpx_lg_f64 src_scc, v[2:3] v_cmpx_lg_f64 0xaf123456, v[254:255] // GFX12: v_cmpx_lg_f64_e32 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x4b,0x7d,0x56,0x34,0x12,0xaf] -v_cmpx_lt_f16 v1, v2 -// GFX12: v_cmpx_lt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x02,0x7d] +v_cmpx_lt_f16 v1.l, v2.l +// GFX12: v_cmpx_lt_f16_e32 v1.l, v2.l ; encoding: [0x01,0x05,0x02,0x7d] -v_cmpx_lt_f16 v127, v2 -// GFX12: v_cmpx_lt_f16_e32 v127, v2 ; encoding: [0x7f,0x05,0x02,0x7d] +v_cmpx_lt_f16 v127.l, v2.l +// GFX12: v_cmpx_lt_f16_e32 v127.l, v2.l ; encoding: [0x7f,0x05,0x02,0x7d] -v_cmpx_lt_f16 s1, v2 -// GFX12: v_cmpx_lt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x02,0x7d] +v_cmpx_lt_f16 s1, v2.l +// GFX12: v_cmpx_lt_f16_e32 s1, v2.l ; encoding: [0x01,0x04,0x02,0x7d] -v_cmpx_lt_f16 s105, v2 -// GFX12: v_cmpx_lt_f16_e32 s105, v2 ; encoding: [0x69,0x04,0x02,0x7d] +v_cmpx_lt_f16 s105, v2.l +// GFX12: v_cmpx_lt_f16_e32 s105, v2.l ; encoding: [0x69,0x04,0x02,0x7d] -v_cmpx_lt_f16 vcc_lo, v2 -// GFX12: v_cmpx_lt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x02,0x7d] +v_cmpx_lt_f16 vcc_lo, v2.l +// GFX12: v_cmpx_lt_f16_e32 vcc_lo, v2.l ; encoding: [0x6a,0x04,0x02,0x7d] -v_cmpx_lt_f16 vcc_hi, v2 -// GFX12: v_cmpx_lt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x02,0x7d] +v_cmpx_lt_f16 vcc_hi, v2.l +// GFX12: v_cmpx_lt_f16_e32 vcc_hi, v2.l ; encoding: [0x6b,0x04,0x02,0x7d] -v_cmpx_lt_f16 ttmp15, v2 -// GFX12: v_cmpx_lt_f16_e32 ttmp15, v2 ; encoding: [0x7b,0x04,0x02,0x7d] +v_cmpx_lt_f16 ttmp15, v2.l +// GFX12: v_cmpx_lt_f16_e32 ttmp15, v2.l ; encoding: [0x7b,0x04,0x02,0x7d] -v_cmpx_lt_f16 m0, v2 -// GFX12: v_cmpx_lt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x02,0x7d] +v_cmpx_lt_f16 m0, v2.l +// GFX12: v_cmpx_lt_f16_e32 m0, v2.l ; encoding: [0x7d,0x04,0x02,0x7d] -v_cmpx_lt_f16 exec_lo, v2 -// GFX12: v_cmpx_lt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x02,0x7d] +v_cmpx_lt_f16 exec_lo, v2.l +// GFX12: v_cmpx_lt_f16_e32 exec_lo, v2.l ; encoding: [0x7e,0x04,0x02,0x7d] -v_cmpx_lt_f16 exec_hi, v2 -// GFX12: v_cmpx_lt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x02,0x7d] +v_cmpx_lt_f16 exec_hi, v2.l +// GFX12: v_cmpx_lt_f16_e32 exec_hi, v2.l ; encoding: [0x7f,0x04,0x02,0x7d] -v_cmpx_lt_f16 null, v2 -// GFX12: v_cmpx_lt_f16_e32 null, v2 ; encoding: [0x7c,0x04,0x02,0x7d] +v_cmpx_lt_f16 null, v2.l +// GFX12: v_cmpx_lt_f16_e32 null, v2.l ; encoding: [0x7c,0x04,0x02,0x7d] -v_cmpx_lt_f16 -1, v2 -// GFX12: v_cmpx_lt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x02,0x7d] +v_cmpx_lt_f16 -1, v2.l +// GFX12: v_cmpx_lt_f16_e32 -1, v2.l ; encoding: [0xc1,0x04,0x02,0x7d] -v_cmpx_lt_f16 0.5, v2 -// GFX12: v_cmpx_lt_f16_e32 0.5, v2 ; encoding: [0xf0,0x04,0x02,0x7d] +v_cmpx_lt_f16 0.5, v2.l +// GFX12: v_cmpx_lt_f16_e32 0.5, v2.l ; encoding: [0xf0,0x04,0x02,0x7d] -v_cmpx_lt_f16 src_scc, v2 -// GFX12: v_cmpx_lt_f16_e32 src_scc, v2 ; encoding: [0xfd,0x04,0x02,0x7d] +v_cmpx_lt_f16 src_scc, v2.l +// GFX12: v_cmpx_lt_f16_e32 src_scc, v2.l ; encoding: [0xfd,0x04,0x02,0x7d] -v_cmpx_lt_f16 0xfe0b, v127 -// GFX12: v_cmpx_lt_f16_e32 0xfe0b, v127 ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] +v_cmpx_lt_f16 0xfe0b, v127.l +// GFX12: v_cmpx_lt_f16_e32 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_f16 v1.h, v2.l +// GFX12: v_cmpx_lt_f16_e32 v1.h, v2.l ; encoding: [0x81,0x05,0x02,0x7d] + +v_cmpx_lt_f16 v127.h, v2.l +// GFX12: v_cmpx_lt_f16_e32 v127.h, v2.l ; encoding: [0xff,0x05,0x02,0x7d] + +v_cmpx_lt_f16 src_scc, v2.h +// GFX12: v_cmpx_lt_f16_e32 src_scc, v2.h ; encoding: [0xfd,0x04,0x03,0x7d] + +v_cmpx_lt_f16 0xfe0b, v127.h +// GFX12: v_cmpx_lt_f16_e32 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x03,0x7d,0x0b,0xfe,0x00,0x00] v_cmpx_lt_f32 v1, v2 // GFX12: v_cmpx_lt_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x22,0x7d] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s index 857d6267a215f..c8f9835181837 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s @@ -1178,47 +1178,53 @@ v_cmpx_lg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x2b,0x7d,0xff,0x6f,0xf5,0x30] -v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f16 v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16 v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] +v_cmpx_lt_f16 v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_cmpx_lt_f16 v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] -v_cmpx_lt_f16 v1, v2 row_mirror -// GFX12: v_cmpx_lt_f16 v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_mirror +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_half_mirror -// GFX12: v_cmpx_lt_f16 v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_half_mirror +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_shl:1 -// GFX12: v_cmpx_lt_f16 v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_shl:1 +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_shl:15 -// GFX12: v_cmpx_lt_f16 v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_shl:15 +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_shr:1 -// GFX12: v_cmpx_lt_f16 v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_shr:1 +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_shr:15 -// GFX12: v_cmpx_lt_f16 v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_shr:15 +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_ror:1 -// GFX12: v_cmpx_lt_f16 v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_ror:1 +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_ror:15 -// GFX12: v_cmpx_lt_f16 v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_ror:15 +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] +v_cmpx_lt_f16 v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] -v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] +v_cmpx_lt_f16 v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] -v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x09,0x13] +v_cmpx_lt_f16 v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX12: v_cmpx_lt_f16 v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x09,0x13] -v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xf5,0x30] +v_cmpx_lt_f16 -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX12: v_cmpx_lt_f16 -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_lt_f16 v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cmpx_lt_f16 v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x03,0x7d,0x81,0x60,0x09,0x13] + +v_cmpx_lt_f16 -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cmpx_lt_f16 -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0x03,0x7d,0xff,0x6f,0xf5,0x30] v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0] // GFX12: v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s index 8ee6b7d488fdf..3e7922d2acbda 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s @@ -254,14 +254,20 @@ v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x2b,0x7d,0xff,0x00,0x00,0x00] -v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] +v_cmpx_lt_f16 v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] +// GFX12: v_cmpx_lt_f16 v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lt_f16 v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cmpx_lt_f16 v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x03,0x7d,0x81,0x77,0x39,0x05] + +v_cmpx_lt_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cmpx_lt_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0x03,0x7d,0xff,0x00,0x00,0x00] v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s index 5019324d174b8..cb317443d2828 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s v_cmpx_class_f16_e32 v1, v255 @@ -253,23 +253,41 @@ v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0] // GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_cmpx_lt_f16_e32 v1.h, v255.h +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1.h, v255.h quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_cmpx_lt_f16_e32 v1.l, v255.l +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1.l, v255.l quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.h, v2.h +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.h, v2.h quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.l, v2.l +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cmpx_lt_f16_e32 v255.l, v2.l quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction v_cmpx_lt_i16_e32 v1, v255 // GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s index 4f462861e3a0b..f3278c826475a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 %s v_cmpx_class_f16 v1, v255 @@ -253,23 +253,41 @@ v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0] // GFX12: v_cmpx_lg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_lt_f16 v1, v255 -// GFX12: v_cmpx_lt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xff,0x03,0x00] +v_cmpx_lt_f16 v1.h, v255.h +// GFX12: v_cmpx_lt_f16_e64 v1.h, v255.h ; encoding: [0x7e,0x18,0x81,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f16 v1.h, v255.h quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x18,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v255, v2 -// GFX12: v_cmpx_lt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0x05,0x02,0x00] +v_cmpx_lt_f16 v1.l, v255.l +// GFX12: v_cmpx_lt_f16_e64 v1.l, v255.l ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] +v_cmpx_lt_f16 v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] +v_cmpx_lt_f16 v1.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f16 v255.h, v2.h +// GFX12: v_cmpx_lt_f16_e64 v255.h, v2.h ; encoding: [0x7e,0x18,0x81,0xd4,0xff,0x05,0x02,0x00] + +v_cmpx_lt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_cmpx_lt_f16 v255.h, v2.h quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x18,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] + +v_cmpx_lt_f16 v255.l, v2.l +// GFX12: v_cmpx_lt_f16_e64 v255.l, v2.l ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0x05,0x02,0x00] + +v_cmpx_lt_f16 v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_cmpx_lt_f16 v255.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] v_cmpx_lt_i16 v1, v255 // GFX12: v_cmpx_lt_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xff,0x03,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopcx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopcx.txt index 4ce26199bcc08..ab5f0af5f6629 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopcx.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopcx.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s 0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff # GFX11: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -1349,46 +1349,72 @@ # GFX11: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] 0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x7e,0x19,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x7e,0x0a,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x0a,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0x7e,0x93,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x93,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff # GFX11: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopcx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopcx.txt index a703568f5c6f2..6867126e9c70e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopcx.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopcx.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s 0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -269,16 +269,32 @@ # GFX11: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX11: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX11: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX11: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +0x7e,0x19,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x19,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +0x7e,0x0a,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x0a,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0x7e,0x93,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# GFX11-REAL16: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x93,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopcx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopcx.txt index d519c0ffa66c6..b9d7a5296cc5e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopcx.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopcx.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s 0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00 # GFX11: v_cmpx_class_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00] @@ -2069,10 +2069,12 @@ # GFX11: v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa5,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] 0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00 -# GFX11: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] +# GFX11-REAL16: v_cmpx_lt_f16_e64 v1.l, v2.l ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] 0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00 -# GFX11: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] +# GFX11-REAL16: v_cmpx_lt_f16_e64 v255.l, v255.l ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] 0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00 # GFX11: v_cmpx_lt_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00] @@ -2113,6 +2115,14 @@ 0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX11: v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00 +# GFX11-REAL16: v_cmpx_lt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] + +0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00 +# GFX11-REAL16: v_cmpx_lt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] + 0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00 # GFX11: v_cmpx_lt_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt index e0b5c16c27d2f..913e753627581 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16 0x01,0x05,0xfa,0x7d # GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d] @@ -2057,49 +2057,84 @@ # GFX11: v_cmpx_lg_f64_e32 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x4b,0x7d,0x56,0x34,0x12,0xaf] 0x01,0x05,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 v1.l, v2.l ; encoding: [0x01,0x05,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x02,0x7d] 0x7f,0x05,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 v127, v2 ; encoding: [0x7f,0x05,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 v127.l, v2.l ; encoding: [0x7f,0x05,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 v127, v2 ; encoding: [0x7f,0x05,0x02,0x7d] 0x01,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 s1, v2.l ; encoding: [0x01,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x02,0x7d] 0x69,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 s105, v2 ; encoding: [0x69,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 s105, v2.l ; encoding: [0x69,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 s105, v2 ; encoding: [0x69,0x04,0x02,0x7d] 0x6a,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 vcc_lo, v2.l ; encoding: [0x6a,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x02,0x7d] 0x6b,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 vcc_hi, v2.l ; encoding: [0x6b,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x02,0x7d] 0x7b,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 ttmp15, v2 ; encoding: [0x7b,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 ttmp15, v2.l ; encoding: [0x7b,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 ttmp15, v2 ; encoding: [0x7b,0x04,0x02,0x7d] 0x7d,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 m0, v2.l ; encoding: [0x7d,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x02,0x7d] 0x7e,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 exec_lo, v2.l ; encoding: [0x7e,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x02,0x7d] 0x7f,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 exec_hi, v2.l ; encoding: [0x7f,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x02,0x7d] 0x7c,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 null, v2 ; encoding: [0x7c,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 null, v2.l ; encoding: [0x7c,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 null, v2 ; encoding: [0x7c,0x04,0x02,0x7d] 0xc1,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 -1, v2.l ; encoding: [0xc1,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x02,0x7d] 0xf0,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 0.5, v2 ; encoding: [0xf0,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 0.5, v2.l ; encoding: [0xf0,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 0.5, v2 ; encoding: [0xf0,0x04,0x02,0x7d] 0xfd,0x04,0x02,0x7d -# GFX11: v_cmpx_lt_f16_e32 src_scc, v2 ; encoding: [0xfd,0x04,0x02,0x7d] +# GFX11-REAL16: v_cmpx_lt_f16_e32 src_scc, v2.l ; encoding: [0xfd,0x04,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 src_scc, v2 ; encoding: [0xfd,0x04,0x02,0x7d] 0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00 -# GFX11: v_cmpx_lt_f16_e32 0xfe0b, v127 ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] +# GFX11-REAL16: v_cmpx_lt_f16_e32 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 0xfe0b, v127 ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] + +0x81,0x05,0x02,0x7d +# GFX11-REAL16: v_cmpx_lt_f16_e32 v1.h, v2.l ; encoding: [0x81,0x05,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x02,0x7d] + +0xff,0x05,0x02,0x7d +# GFX11-REAL16: v_cmpx_lt_f16_e32 v127.h, v2.l ; encoding: [0xff,0x05,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x02,0x7d] + +0xf0,0xfe,0x02,0x7d +# GFX11-REAL16: v_cmpx_lt_f16_e32 0.5, v127.l ; encoding: [0xf0,0xfe,0x02,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 0.5, v127 ; encoding: [0xf0,0xfe,0x02,0x7d] + +0xfd,0x04,0x03,0x7d +# GFX11-REAL16: v_cmpx_lt_f16_e32 src_scc, v2.h ; encoding: [0xfd,0x04,0x03,0x7d] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x03,0x7d] + +0xff,0xfe,0x03,0x7d,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_cmpx_lt_f16_e32 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x03,0x7d,0x0b,0xfe,0x00,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16_e32 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x03,0x7d,0x0b,0xfe,0x00,0x00] 0x01,0x05,0x22,0x7d # GFX11: v_cmpx_lt_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x22,0x7d] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt index 1d7e82c8bf96f..8919d86071f4d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16 0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff # GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] @@ -1349,46 +1349,72 @@ # GFX11: v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x2b,0x7d,0xff,0x6f,0xfd,0x30] 0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff -# GFX11: v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01 -# GFX11: v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x02,0x7d,0x01,0x60,0x01,0x13 -# GFX11: v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x01,0x13] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x01,0x13] 0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xfd,0x30 -# GFX11: v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xfd,0x30] +# GFX11-REAL16: v_cmpx_lt_f16 -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xfd,0x30] + +0xfa,0xfe,0x02,0x7d,0x7f,0x5f,0x01,0x01 +# GFX11-REAL16: v_cmpx_lt_f16 v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x5f,0x01,0x01] +# GFX11-FAKE16: v_cmpx_lt_f16 v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x5f,0x01,0x01] + +0xfa,0x04,0x03,0x7d,0x81,0x60,0x01,0x13 +# GFX11-REAL16: v_cmpx_lt_f16 v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x03,0x7d,0x81,0x60,0x01,0x13] +# GFX11-FAKE16: v_cmpx_lt_f16 v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x03,0x7d,0x81,0x60,0x01,0x13] + +0xfa,0xfe,0x03,0x7d,0xff,0x6f,0xfd,0x30 +# GFX11-REAL16: v_cmpx_lt_f16 -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x03,0x7d,0xff,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_cmpx_lt_f16 -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x03,0x7d,0xff,0x6f,0xfd,0x30] 0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff # GFX11: v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt index a6d8ec95d6d63..867fd7374b788 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16 0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05 # GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] @@ -197,10 +197,24 @@ # GFX11: v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x2b,0x7d,0xff,0x00,0x00,0x00] 0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05 -# GFX11: v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] 0xea,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00 -# GFX11: v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] +# GFX11-REAL16: v_cmpx_lt_f16 v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] + +0xe9,0xfe,0x02,0x7d,0x7f,0x77,0x39,0x05 +# GFX11-REAL16: v_cmpx_lt_f16 v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x77,0x39,0x05] +# GFX11-FAKE16: v_cmpx_lt_f16 v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x77,0x39,0x05] + +0xe9,0x04,0x03,0x7d,0x81,0x77,0x39,0x05 +# GFX11-REAL16: v_cmpx_lt_f16 v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x03,0x7d,0x81,0x77,0x39,0x05] +# GFX11-FAKE16: v_cmpx_lt_f16 v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x03,0x7d,0x81,0x77,0x39,0x05] + +0xea,0xfe,0x03,0x7d,0xff,0x00,0x00,0x00 +# GFX11-REAL16: v_cmpx_lt_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x03,0x7d,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_cmpx_lt_f16 v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x03,0x7d,0xff,0x00,0x00,0x00] 0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05 # GFX11: v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt index 25c4e4ad43b1b..3ccf6feac4cca 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt @@ -2199,7 +2199,6 @@ # GFX12-REAL16: v_fract_f16_e64 v255.h, -|0xfe0b| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] # GFX12-FAKE16: v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -# GFX11: v_fract_f16_e64 v5.h, v1.h op_sel:[1,1] ; encoding: [0x05,0x48,0xdf,0xd5,0x01,0x01,0x00,0x00] 0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00 # GFX12: v_fract_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] @@ -2504,7 +2503,6 @@ # GFX12-REAL16: v_frexp_mant_f16_e64 v255.h, -|0xfe0b| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] # GFX12-FAKE16: v_frexp_mant_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -# GFX11: v_frexp_mant_f16_e64 v5.h, v1.h op_sel:[1,1] ; encoding: [0x05,0x48,0xd9,0xd5,0x01,0x01,0x00,0x00] 0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00 # GFX12: v_frexp_mant_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt index f447fb42afc7b..a020b0ae46a37 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt @@ -1794,7 +1794,6 @@ # GFX12-REAL16: v_fract_f16_e64_dpp v255.h, -|v255.l| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] # GFX12-FAKE16: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -# GFX11: v_fract_f16_e64_dpp v5.h, v1.h op_sel:[1,1] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x48,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] 0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX12: v_fract_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] @@ -2013,7 +2012,6 @@ # GFX12-REAL16: v_frexp_mant_f16_e64_dpp v255.h, -|v255.l| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] # GFX12-FAKE16: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -# GFX11: v_frexp_mant_f16_e64_dpp v5.h, v1.h op_sel:[1,1] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x48,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] 0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX12: v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt index 7cf415aad5a19..ad3c673b4e390 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt @@ -528,7 +528,6 @@ # GFX12-REAL16: v_fract_f16_e64_dpp v255.h, -|v255.l| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] # GFX12-FAKE16: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -# GFX11: v_fract_f16_e64_dpp v5.h, v1.h op_sel:[1,1] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] 0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX12: v_fract_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] @@ -593,7 +592,6 @@ # GFX12-REAL16: v_frexp_mant_f16_e64_dpp v255.h, -|v255.l| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] # GFX12-FAKE16: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -# GFX11: v_frexp_mant_f16_e64_dpp v5.h, v1.h op_sel:[1,1] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x48,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] 0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX12: v_frexp_mant_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx.txt index 46f255c2f484f..55e0bf6c525ec 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s 0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00 # GFX12: v_cmpx_class_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00] @@ -1778,10 +1778,12 @@ # GFX12: v_cmpx_lg_f64_e64 0xaf123456, -|vcc| clamp ; encoding: [0x7e,0x82,0xa5,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] 0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00 -# GFX12: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] +# GFX12-REAL16: v_cmpx_lt_f16_e64 v1.l, v2.l ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] 0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00 -# GFX12: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] +# GFX12-REAL16: v_cmpx_lt_f16_e64 v255.l, v255.l ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] 0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00 # GFX12: v_cmpx_lt_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00] @@ -1822,6 +1824,14 @@ 0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX12: v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00 +# GFX12-REAL16: v_cmpx_lt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] + +0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00 +# GFX12-REAL16: v_cmpx_lt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] + 0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00 # GFX12: v_cmpx_lt_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt index 3550b6fc5e95d..041e43f4d05e5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s 0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff # GFX12: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -1268,49 +1268,123 @@ # GFX12: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] 0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX12: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00 +# GFX12-REAL16: v_cmpx_lt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x05,0x02,0x00] + +0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00 +# GFX12-REAL16: v_cmpx_lt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e64 v255, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0xff,0x03,0x00] + +0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00 +# GFX12: v_cmpx_lt_f16_e64 s1, s2 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0x04,0x00,0x00] + +0x7e,0x00,0x81,0xd4,0x69,0xd2,0x00,0x00 +# GFX12: v_cmpx_lt_f16_e64 s105, s105 ; encoding: [0x7e,0x00,0x81,0xd4,0x69,0xd2,0x00,0x00] + +0x7e,0x00,0x81,0xd4,0x6a,0xf6,0x00,0x00 +# GFX12: v_cmpx_lt_f16_e64 vcc_lo, ttmp15 ; encoding: [0x7e,0x00,0x81,0xd4,0x6a,0xf6,0x00,0x00] + +0x7e,0x00,0x81,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 +# GFX12: v_cmpx_lt_f16_e64 vcc_hi, 0xfe0b ; encoding: [0x7e,0x00,0x81,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +0x7e,0x00,0x81,0xd4,0x7b,0xfa,0x01,0x00 +# GFX12: v_cmpx_lt_f16_e64 ttmp15, src_scc ; encoding: [0x7e,0x00,0x81,0xd4,0x7b,0xfa,0x01,0x00] + +0x7e,0x00,0x81,0xd4,0x7d,0xe0,0x01,0x00 +# GFX12: v_cmpx_lt_f16_e64 m0, 0.5 ; encoding: [0x7e,0x00,0x81,0xd4,0x7d,0xe0,0x01,0x00] + +0x7e,0x00,0x81,0xd4,0x7e,0x82,0x01,0x00 +# GFX12: v_cmpx_lt_f16_e64 exec_lo, -1 ; encoding: [0x7e,0x00,0x81,0xd4,0x7e,0x82,0x01,0x00] + +0x7e,0x01,0x81,0xd4,0x7f,0xf8,0x00,0x00 +# GFX12: v_cmpx_lt_f16_e64 |exec_hi|, null ; encoding: [0x7e,0x01,0x81,0xd4,0x7f,0xf8,0x00,0x00] + +0x7e,0x00,0x81,0xd4,0x7c,0xfc,0x00,0x00 +# GFX12: v_cmpx_lt_f16_e64 null, exec_lo ; encoding: [0x7e,0x00,0x81,0xd4,0x7c,0xfc,0x00,0x00] + +0x7e,0x00,0x81,0xd4,0xc1,0xfe,0x00,0x00 +# GFX12: v_cmpx_lt_f16_e64 -1, exec_hi ; encoding: [0x7e,0x00,0x81,0xd4,0xc1,0xfe,0x00,0x00] + +0x7e,0x00,0x81,0xd4,0xf0,0xfa,0x00,0x40 +# GFX12: v_cmpx_lt_f16_e64 0.5, -m0 ; encoding: [0x7e,0x00,0x81,0xd4,0xf0,0xfa,0x00,0x40] + +0x7e,0x02,0x81,0xd4,0xfd,0xd4,0x00,0x20 +# GFX12: v_cmpx_lt_f16_e64 -src_scc, |vcc_lo| ; encoding: [0x7e,0x02,0x81,0xd4,0xfd,0xd4,0x00,0x20] + +0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 +# GFX12: v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +0x7e,0x19,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x81,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x7e,0x0a,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x0a,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x02,0x81,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0x7e,0x93,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x93,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff # GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt index 9442dcc4fb1d5..35e7a45a7b162 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s 0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX12: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -323,19 +323,36 @@ # GFX12: v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x95,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x81,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_cmpx_lt_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp v1.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] 0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX12: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX12: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX12: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.l| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +0x7e,0x19,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x19,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +0x7e,0x0a,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x0a,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x02,0x81,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0x7e,0x93,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# GFX12-REAL16: v_cmpx_lt_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x93,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x83,0x81,0xd4,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt index 180ec987280d1..9e5959ca4a77e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-FAKE16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-FAKE16 0x01,0x05,0xfa,0x7d # GFX12: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d] @@ -1769,49 +1769,80 @@ # GFX12: v_cmpx_lg_f64_e32 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0x4b,0x7d,0x56,0x34,0x12,0xaf] 0x01,0x05,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 v1.l, v2.l ; encoding: [0x01,0x05,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 v1, v2 ; encoding: [0x01,0x05,0x02,0x7d] 0x7f,0x05,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 v127, v2 ; encoding: [0x7f,0x05,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 v127.l, v2.l ; encoding: [0x7f,0x05,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 v127, v2 ; encoding: [0x7f,0x05,0x02,0x7d] 0x01,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 s1, v2.l ; encoding: [0x01,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 s1, v2 ; encoding: [0x01,0x04,0x02,0x7d] 0x69,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 s105, v2 ; encoding: [0x69,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 s105, v2.l ; encoding: [0x69,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 s105, v2 ; encoding: [0x69,0x04,0x02,0x7d] 0x6a,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 vcc_lo, v2.l ; encoding: [0x6a,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 vcc_lo, v2 ; encoding: [0x6a,0x04,0x02,0x7d] 0x6b,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 vcc_hi, v2.l ; encoding: [0x6b,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 vcc_hi, v2 ; encoding: [0x6b,0x04,0x02,0x7d] 0x7b,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 ttmp15, v2 ; encoding: [0x7b,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 ttmp15, v2.l ; encoding: [0x7b,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 ttmp15, v2 ; encoding: [0x7b,0x04,0x02,0x7d] 0x7d,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 m0, v2.l ; encoding: [0x7d,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 m0, v2 ; encoding: [0x7d,0x04,0x02,0x7d] 0x7e,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 exec_lo, v2.l ; encoding: [0x7e,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 exec_lo, v2 ; encoding: [0x7e,0x04,0x02,0x7d] 0x7f,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 exec_hi, v2.l ; encoding: [0x7f,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 exec_hi, v2 ; encoding: [0x7f,0x04,0x02,0x7d] 0x7c,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 null, v2 ; encoding: [0x7c,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 null, v2.l ; encoding: [0x7c,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 null, v2 ; encoding: [0x7c,0x04,0x02,0x7d] 0xc1,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 -1, v2.l ; encoding: [0xc1,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 -1, v2 ; encoding: [0xc1,0x04,0x02,0x7d] 0xf0,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 0.5, v2 ; encoding: [0xf0,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 0.5, v2.l ; encoding: [0xf0,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 0.5, v2 ; encoding: [0xf0,0x04,0x02,0x7d] 0xfd,0x04,0x02,0x7d -# GFX12: v_cmpx_lt_f16_e32 src_scc, v2 ; encoding: [0xfd,0x04,0x02,0x7d] +# GFX12-REAL16: v_cmpx_lt_f16_e32 src_scc, v2.l ; encoding: [0xfd,0x04,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 src_scc, v2 ; encoding: [0xfd,0x04,0x02,0x7d] 0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00 -# GFX12: v_cmpx_lt_f16_e32 0xfe0b, v127 ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] +# GFX12-REAL16: v_cmpx_lt_f16_e32 0xfe0b, v127.l ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 0xfe0b, v127 ; encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] + +0x81,0x05,0x02,0x7d +# GFX12-REAL16: v_cmpx_lt_f16_e32 v1.h, v2.l ; encoding: [0x81,0x05,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x02,0x7d] + +0xff,0x05,0x02,0x7d +# GFX12-REAL16: v_cmpx_lt_f16_e32 v127.h, v2.l ; encoding: [0xff,0x05,0x02,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x02,0x7d] + +0xfd,0x04,0x03,0x7d +# GFX12-REAL16: v_cmpx_lt_f16_e32 src_scc, v2.h ; encoding: [0xfd,0x04,0x03,0x7d] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 src_scc, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xfd,0x04,0x03,0x7d] + +0xff,0xfe,0x03,0x7d,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_cmpx_lt_f16_e32 0xfe0b, v127.h ; encoding: [0xff,0xfe,0x03,0x7d,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16_e32 0xfe0b, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ ; encoding: [0xff,0xfe,0x03,0x7d,0x0b,0xfe,0x00,0x00] 0x01,0x05,0x22,0x7d # GFX12: v_cmpx_lt_f32_e32 v1, v2 ; encoding: [0x01,0x05,0x22,0x7d] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt index e65d451116d29..8ecef5536ad79 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-FAKE16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-FAKE16 0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff # GFX12: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] @@ -1181,46 +1181,68 @@ # GFX12: v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x2b,0x7d,0xff,0x6f,0xfd,0x30] 0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff -# GFX12: v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] 0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01 -# GFX12: v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x02,0x7d,0x01,0x60,0x01,0x13 -# GFX12: v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x01,0x13] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x01,0x13] 0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xfd,0x30 -# GFX12: v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xfd,0x30] +# GFX12-REAL16: v_cmpx_lt_f16 -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xfd,0x30] + +0xfa,0x04,0x03,0x7d,0x81,0x60,0x01,0x13 +# GFX12-REAL16: v_cmpx_lt_f16 v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x03,0x7d,0x81,0x60,0x01,0x13] +# GFX12-FAKE16: v_cmpx_lt_f16 v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x03,0x7d,0x81,0x60,0x01,0x13] + +0xfa,0xfe,0x03,0x7d,0xff,0x6f,0xfd,0x30 +# GFX12-REAL16: v_cmpx_lt_f16 -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x03,0x7d,0xff,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_cmpx_lt_f16 -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/|, -|v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0x03,0x7d,0xff,0x6f,0xfd,0x30] 0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff # GFX12: v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt index 4449cbcfb3608..147084df5384f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-REAL16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-FAKE16 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=GFX12,GFX12-FAKE16 0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05 # GFX12: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] @@ -173,10 +173,20 @@ # GFX12: v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x2b,0x7d,0xff,0x00,0x00,0x00] 0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05 -# GFX12: v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] +# GFX12-REAL16: v_cmpx_lt_f16 v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] 0xea,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00 -# GFX12: v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] +# GFX12-REAL16: v_cmpx_lt_f16 v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] + +0xe9,0x04,0x03,0x7d,0x81,0x77,0x39,0x05 +# GFX12-REAL16: v_cmpx_lt_f16 v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x03,0x7d,0x81,0x77,0x39,0x05] +# GFX12-FAKE16: v_cmpx_lt_f16 v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x03,0x7d,0x81,0x77,0x39,0x05] + +0xea,0xfe,0x03,0x7d,0xff,0x00,0x00,0x00 +# GFX12-REAL16: v_cmpx_lt_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x03,0x7d,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_cmpx_lt_f16 v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/, v255/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0x03,0x7d,0xff,0x00,0x00,0x00] 0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05 # GFX12: v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05] From 8ae1cb2bcb55293cce31bb75c38d6b4e8a13cc23 Mon Sep 17 00:00:00 2001 From: Iman Hosseini Date: Fri, 17 Jan 2025 14:40:31 +0000 Subject: [PATCH 15/88] add power function to APInt (#122788) I am trying to calculate power function for APFloat, APInt to constant fold vector reductions: https://github.com/llvm/llvm-project/pull/122450 I need this utility to fold N `mul`s into power. --------- Co-authored-by: ImanHosseini Co-authored-by: Jakub Kuderski --- llvm/include/llvm/ADT/APInt.h | 4 ++ llvm/lib/Support/APInt.cpp | 18 +++++++++ llvm/unittests/ADT/APIntTest.cpp | 67 ++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+) diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 225390f1af60b..02d58d8c3d31c 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -2263,6 +2263,10 @@ APInt mulhs(const APInt &C1, const APInt &C2); /// Returns the high N bits of the multiplication result. APInt mulhu(const APInt &C1, const APInt &C2); +/// Compute X^N for N>=0. +/// 0^0 is supported and returns 1. +APInt pow(const APInt &X, int64_t N); + /// Compute GCD of two unsigned APInt values. /// /// This function returns the greatest common divisor of the two APInt values diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index ea8295f95c751..38cf485733a93 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -3108,3 +3108,21 @@ APInt APIntOps::mulhu(const APInt &C1, const APInt &C2) { APInt C2Ext = C2.zext(FullWidth); return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth()); } + +APInt APIntOps::pow(const APInt &X, int64_t N) { + assert(N >= 0 && "negative exponents not supported."); + APInt Acc = APInt(X.getBitWidth(), 1); + if (N == 0) + return Acc; + APInt Base = X; + int64_t RemainingExponent = N; + while (RemainingExponent > 0) { + while (RemainingExponent % 2 == 0) { + Base *= Base; + RemainingExponent /= 2; + } + --RemainingExponent; + Acc *= Base; + } + return Acc; +}; diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp index 4d5553fcbd1e3..b14366eac2185 100644 --- a/llvm/unittests/ADT/APIntTest.cpp +++ b/llvm/unittests/ADT/APIntTest.cpp @@ -29,6 +29,73 @@ TEST(APIntTest, ValueInit) { EXPECT_TRUE(!Zero.sext(64)); } +// Test that 0^5 == 0 +TEST(APIntTest, PowZeroTo5) { + APInt Zero = APInt::getZero(32); + EXPECT_TRUE(!Zero); + APInt ZeroTo5 = APIntOps::pow(Zero, 5); + EXPECT_TRUE(!ZeroTo5); +} + +// Test that 1^16 == 1 +TEST(APIntTest, PowOneTo16) { + APInt One(32, 1); + APInt OneTo16 = APIntOps::pow(One, 16); + EXPECT_EQ(One, OneTo16); +} + +// Test that 2^10 == 1024 +TEST(APIntTest, PowerTwoTo10) { + APInt Two(32, 2); + APInt TwoTo20 = APIntOps::pow(Two, 10); + APInt V_1024(32, 1024); + EXPECT_EQ(TwoTo20, V_1024); +} + +// Test that 3^3 == 27 +TEST(APIntTest, PowerThreeTo3) { + APInt Three(32, 3); + APInt ThreeTo3 = APIntOps::pow(Three, 3); + APInt V_27(32, 27); + EXPECT_EQ(ThreeTo3, V_27); +} + +// Test that SignedMaxValue^3 == SignedMaxValue +TEST(APIntTest, PowerSignedMaxValue) { + APInt SignedMaxValue = APInt::getSignedMaxValue(32); + APInt MaxTo3 = APIntOps::pow(SignedMaxValue, 3); + EXPECT_EQ(MaxTo3, SignedMaxValue); +} + +// Test that MaxValue^3 == MaxValue +TEST(APIntTest, PowerMaxValue) { + APInt MaxValue = APInt::getMaxValue(32); + APInt MaxTo3 = APIntOps::pow(MaxValue, 3); + EXPECT_EQ(MaxValue, MaxTo3); +} + +// Test that SignedMinValue^3 == 0 +TEST(APIntTest, PowerSignedMinValueTo3) { + APInt SignedMinValue = APInt::getSignedMinValue(32); + APInt MinTo3 = APIntOps::pow(SignedMinValue, 3); + EXPECT_TRUE(MinTo3.isZero()); +} + +// Test that SignedMinValue^1 == SignedMinValue +TEST(APIntTest, PowerSignedMinValueTo1) { + APInt SignedMinValue = APInt::getSignedMinValue(32); + APInt MinTo1 = APIntOps::pow(SignedMinValue, 1); + EXPECT_EQ(SignedMinValue, MinTo1); +} + +// Test that MaxValue^3 == MaxValue +TEST(APIntTest, ZeroToZero) { + APInt Zero = APInt::getZero(32); + APInt One(32, 1); + APInt ZeroToZero = APIntOps::pow(Zero, 0); + EXPECT_EQ(ZeroToZero, One); +} + // Test that APInt shift left works when bitwidth > 64 and shiftamt == 0 TEST(APIntTest, ShiftLeftByZero) { APInt One = APInt::getZero(65) + 1; From 76569025dd8b026b3309dedbcaf877d16eace805 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 17 Jan 2025 14:46:22 +0000 Subject: [PATCH 16/88] [X86] Fold (v4i32 (scalar_to_vector (i32 (anyext (bitcast (f16)))))) -> (v4i32 bitcast (v8f16 scalar_to_vector)) (#123338) This pattern tends to appear during f16 -> f32 promotion Partially addresses the unnecessary XMM->GPR->XMM moves when working with f16 types (#107086) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 ++++ llvm/test/CodeGen/X86/bfloat.ll | 49 +++++++-------- .../CodeGen/X86/canonicalize-vars-f16-type.ll | 4 -- llvm/test/CodeGen/X86/cvt16.ll | 6 +- llvm/test/CodeGen/X86/fp-roundeven.ll | 2 - llvm/test/CodeGen/X86/fp16-libcalls.ll | 46 --------------- llvm/test/CodeGen/X86/half-darwin.ll | 3 +- llvm/test/CodeGen/X86/half-fp80-darwin.ll | 3 +- llvm/test/CodeGen/X86/half.ll | 59 ++++++------------- llvm/test/CodeGen/X86/pr114520.ll | 1 - llvm/test/CodeGen/X86/pr31088.ll | 10 ---- llvm/test/CodeGen/X86/pr86305.ll | 24 ++++---- .../CodeGen/X86/vector-half-conversions.ll | 12 ++-- 13 files changed, 70 insertions(+), 160 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5bebee765833b..84736f18011a9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58574,6 +58574,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { + using namespace SDPatternMatch; EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); SDLoc DL(N); @@ -58641,6 +58642,16 @@ static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, SrcOp); } + if (VT == MVT::v4i32) { + SDValue HalfSrc; + // Combine (v4i32 (scalar_to_vector (i32 (anyext (bitcast (f16)))))) + // to remove XMM->GPR->XMM moves. + if (sd_match(Src, m_AnyExt(m_BitCast( + m_AllOf(m_SpecificVT(MVT::f16), m_Value(HalfSrc)))))) + return DAG.getBitcast( + VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, HalfSrc)); + } + // See if we're broadcasting the scalar value, in which case just reuse that. // Ensure the same SDValue from the SDNode use is being used. if (VT.getScalarType() == Src.getValueType()) diff --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll index a6b3e3fd1fd16..d67cd6b62c2b9 100644 --- a/llvm/test/CodeGen/X86/bfloat.ll +++ b/llvm/test/CodeGen/X86/bfloat.ll @@ -708,10 +708,8 @@ define <2 x bfloat> @pr62997(bfloat %a, bfloat %b) { ; ; BF16-LABEL: pr62997: ; BF16: # %bb.0: -; BF16-NEXT: vpextrw $0, %xmm0, %eax -; BF16-NEXT: vpextrw $0, %xmm1, %ecx -; BF16-NEXT: vmovd %eax, %xmm0 -; BF16-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 +; BF16-NEXT: vpextrw $0, %xmm1, %eax +; BF16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ; BF16-NEXT: retq ; ; FP16-LABEL: pr62997: @@ -1652,66 +1650,63 @@ define <8 x bfloat> @fptrunc_v8f64(<8 x double> %a) nounwind { ; AVXNC-NEXT: pushq %r12 ; AVXNC-NEXT: pushq %rbx ; AVXNC-NEXT: subq $168, %rsp -; AVXNC-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVXNC-NEXT: vmovups %ymm1, (%rsp) # 32-byte Spill ; AVXNC-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVXNC-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVXNC-NEXT: vzeroupper ; AVXNC-NEXT: callq __truncdfbf2@PLT ; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[1,0] +; AVXNC-NEXT: callq __truncdfbf2@PLT +; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVXNC-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; AVXNC-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVXNC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVXNC-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] ; AVXNC-NEXT: vzeroupper ; AVXNC-NEXT: callq __truncdfbf2@PLT -; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVXNC-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload -; AVXNC-NEXT: # xmm0 = mem[1,0] -; AVXNC-NEXT: callq __truncdfbf2@PLT -; AVXNC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill -; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVXNC-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVXNC-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVXNC-NEXT: vzeroupper ; AVXNC-NEXT: callq __truncdfbf2@PLT ; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVXNC-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload ; AVXNC-NEXT: # xmm0 = mem[1,0] ; AVXNC-NEXT: callq __truncdfbf2@PLT ; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVXNC-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVXNC-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVXNC-NEXT: vzeroupper ; AVXNC-NEXT: callq __truncdfbf2@PLT -; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVXNC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; AVXNC-NEXT: # xmm0 = mem[1,0] ; AVXNC-NEXT: callq __truncdfbf2@PLT -; AVXNC-NEXT: vpextrw $0, %xmm0, %eax -; AVXNC-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vpextrw $0, %xmm0, %ebx +; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; AVXNC-NEXT: vpextrw $0, %xmm0, %ebp ; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; AVXNC-NEXT: vpextrw $0, %xmm0, %r14d ; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; AVXNC-NEXT: vpextrw $0, %xmm0, %r15d -; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; AVXNC-NEXT: vpextrw $0, %xmm0, %r12d ; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; AVXNC-NEXT: vpextrw $0, %xmm0, %r13d ; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; AVXNC-NEXT: vpextrw $0, %xmm0, %ebx -; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; AVXNC-NEXT: # xmm0 = mem[1,0] ; AVXNC-NEXT: callq __truncdfbf2@PLT ; AVXNC-NEXT: vpextrw $0, %xmm0, %eax -; AVXNC-NEXT: vmovd %ebx, %xmm0 -; AVXNC-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; AVXNC-NEXT: vpinsrw $2, %r13d, %xmm0, %xmm0 +; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vpinsrw $1, %r13d, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ; AVXNC-NEXT: vpinsrw $3, %r12d, %xmm0, %xmm0 ; AVXNC-NEXT: vpinsrw $4, %r15d, %xmm0, %xmm0 ; AVXNC-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 ; AVXNC-NEXT: vpinsrw $6, %ebp, %xmm0, %xmm0 -; AVXNC-NEXT: vpinsrw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVXNC-NEXT: vpinsrw $7, %ebx, %xmm0, %xmm0 ; AVXNC-NEXT: addq $168, %rsp ; AVXNC-NEXT: popq %rbx ; AVXNC-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll index fdf0bf3f692d6..e911a24d830f7 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll @@ -133,11 +133,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind { ; ; AVX512-LABEL: complex_canonicalize_fmul_half: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrw $0, %xmm1, %eax -; AVX512-NEXT: vpextrw $0, %xmm0, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512-NEXT: vmovd %eax, %xmm1 ; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/cvt16.ll b/llvm/test/CodeGen/X86/cvt16.ll index c7ef353f7f603..efc457e35e7f3 100644 --- a/llvm/test/CodeGen/X86/cvt16.ll +++ b/llvm/test/CodeGen/X86/cvt16.ll @@ -59,8 +59,7 @@ define float @test2(ptr nocapture %src) nounwind { ; ; F16C-LABEL: test2: ; F16C: # %bb.0: -; F16C-NEXT: movzwl (%rdi), %eax -; F16C-NEXT: vmovd %eax, %xmm0 +; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: retq ; @@ -119,8 +118,7 @@ define double @test4(ptr nocapture %src) nounwind { ; ; F16C-LABEL: test4: ; F16C: # %bb.0: -; F16C-NEXT: movzwl (%rdi), %eax -; F16C-NEXT: vmovd %eax, %xmm0 +; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; F16C-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fp-roundeven.ll b/llvm/test/CodeGen/X86/fp-roundeven.ll index 8037c783dd8e6..7d1c52cd65451 100644 --- a/llvm/test/CodeGen/X86/fp-roundeven.ll +++ b/llvm/test/CodeGen/X86/fp-roundeven.ll @@ -50,8 +50,6 @@ define half @roundeven_f16(half %h) { ; ; AVX512F-LABEL: roundeven_f16: ; AVX512F: ## %bb.0: ## %entry -; AVX512F-NEXT: vpextrw $0, %xmm0, %eax -; AVX512F-NEXT: vmovd %eax, %xmm0 ; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512F-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/fp16-libcalls.ll b/llvm/test/CodeGen/X86/fp16-libcalls.ll index 1515cd1366bc6..0d8290b120fa4 100644 --- a/llvm/test/CodeGen/X86/fp16-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp16-libcalls.ll @@ -9,8 +9,6 @@ define void @test_half_ceil(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_ceil: ; F16C: # %bb.0: -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -107,8 +105,6 @@ define void @test_half_cos(half %a0, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq cosf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -168,8 +164,6 @@ define void @test_half_exp(half %a0, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq expf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -229,8 +223,6 @@ define void @test_half_exp2(half %a0, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq exp2f@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -290,8 +282,6 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq exp10f@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -349,8 +339,6 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind { define void @test_half_fabs(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_fabs: ; F16C: # %bb.0: -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -392,8 +380,6 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind { define void @test_half_floor(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_floor: ; F16C: # %bb.0: -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -447,14 +433,8 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm2, %eax -; F16C-NEXT: vpextrw $0, %xmm1, %ecx -; F16C-NEXT: vpextrw $0, %xmm0, %edx -; F16C-NEXT: vmovd %edx, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; F16C-NEXT: vmovd %ecx, %xmm1 ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; F16C-NEXT: vmovd %eax, %xmm2 ; F16C-NEXT: vcvtph2ps %xmm2, %xmm2 ; F16C-NEXT: callq fmaf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -542,8 +522,6 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind { define void @test_half_fneg(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_fneg: ; F16C: # %bb.0: -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -587,8 +565,6 @@ define void @test_half_log(half %a0, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq logf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -648,8 +624,6 @@ define void @test_half_log2(half %a0, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq log2f@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -709,8 +683,6 @@ define void @test_half_log10(half %a0, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq log10f@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -768,8 +740,6 @@ define void @test_half_log10(half %a0, ptr %p0) nounwind { define void @test_half_nearbyint(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_nearbyint: ; F16C: # %bb.0: -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -823,11 +793,7 @@ define void @test_half_pow(half %a0, half %a1, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm1, %eax -; F16C-NEXT: vpextrw $0, %xmm0, %ecx -; F16C-NEXT: vmovd %ecx, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; F16C-NEXT: vmovd %eax, %xmm1 ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 ; F16C-NEXT: callq powf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -907,8 +873,6 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rsi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq __powisf2@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -976,8 +940,6 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind { define void @test_half_rint(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_rint: ; F16C: # %bb.0: -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -1031,8 +993,6 @@ define void @test_half_sin(half %a0, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq sinf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -1090,8 +1050,6 @@ define void @test_half_sin(half %a0, ptr %p0) nounwind { define void @test_half_sqrt(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_sqrt: ; F16C: # %bb.0: -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -1146,8 +1104,6 @@ define void @test_half_tan(half %a0, ptr %p0) nounwind { ; F16C: # %bb.0: ; F16C-NEXT: pushq %rbx ; F16C-NEXT: movq %rdi, %rbx -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq tanf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -1205,8 +1161,6 @@ define void @test_half_tan(half %a0, ptr %p0) nounwind { define void @test_half_trunc(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_trunc: ; F16C: # %bb.0: -; F16C-NEXT: vpextrw $0, %xmm0, %eax -; F16C-NEXT: vmovd %eax, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/half-darwin.ll b/llvm/test/CodeGen/X86/half-darwin.ll index 1d2f4eb39bbe6..ec099db4e7ca7 100644 --- a/llvm/test/CodeGen/X86/half-darwin.ll +++ b/llvm/test/CodeGen/X86/half-darwin.ll @@ -76,8 +76,7 @@ define float @extendhfsf(ptr %ptr) nounwind { ; ; CHECK-F16C-LABEL: extendhfsf: ; CHECK-F16C: ## %bb.0: -; CHECK-F16C-NEXT: movzwl (%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-F16C-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/half-fp80-darwin.ll b/llvm/test/CodeGen/X86/half-fp80-darwin.ll index 0ba734e66c7b2..65a26187c5857 100644 --- a/llvm/test/CodeGen/X86/half-fp80-darwin.ll +++ b/llvm/test/CodeGen/X86/half-fp80-darwin.ll @@ -19,8 +19,7 @@ define void @extendhfxf(ptr %outptr, ptr %inptr) nounwind { ; ; CHECK-F16C-LABEL: extendhfxf: ; CHECK-F16C: ## %bb.0: -; CHECK-F16C-NEXT: movzwl (%rsi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vpinsrw $0, (%rsi), %xmm0, %xmm0 ; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-F16C-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-F16C-NEXT: flds -{{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 033cadae6a1e7..7bac075e48680 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -81,8 +81,7 @@ define float @test_extend32(ptr %addr) #0 { ; ; BWON-F16C-LABEL: test_extend32: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: movzwl (%rdi), %eax -; BWON-F16C-NEXT: vmovd %eax, %xmm0 +; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: retq ; @@ -113,8 +112,7 @@ define double @test_extend64(ptr %addr) #0 { ; ; BWON-F16C-LABEL: test_extend64: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: movzwl (%rdi), %eax -; BWON-F16C-NEXT: vmovd %eax, %xmm0 +; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; BWON-F16C-NEXT: retq @@ -220,8 +218,7 @@ define i64 @test_fptosi_i64(ptr %p) #0 { ; ; BWON-F16C-LABEL: test_fptosi_i64: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: movzwl (%rdi), %eax -; BWON-F16C-NEXT: vmovd %eax, %xmm0 +; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax ; BWON-F16C-NEXT: retq @@ -312,8 +309,7 @@ define i64 @test_fptoui_i64(ptr %p) #0 { ; ; BWON-F16C-LABEL: test_fptoui_i64: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: movzwl (%rdi), %eax -; BWON-F16C-NEXT: vmovd %eax, %xmm0 +; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rcx ; BWON-F16C-NEXT: movq %rcx, %rdx @@ -851,13 +847,12 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 { ; ; BWON-F16C-LABEL: test_sitofp_fadd_i32: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 -; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; BWON-F16C-NEXT: movzwl (%rsi), %eax -; BWON-F16C-NEXT: vmovd %eax, %xmm1 +; BWON-F16C-NEXT: vpinsrw $0, (%rsi), %xmm0, %xmm0 +; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm1, %xmm1 +; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; BWON-F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; BWON-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: retq @@ -916,8 +911,6 @@ define half @PR40273(half) #0 { ; ; BWON-F16C-LABEL: PR40273: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax -; BWON-F16C-NEXT: vmovd %eax, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: xorl %eax, %eax ; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -969,8 +962,6 @@ define void @brcond(half %0) #0 { ; ; BWON-F16C-LABEL: brcond: ; BWON-F16C: # %bb.0: # %entry -; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax -; BWON-F16C-NEXT: vmovd %eax, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 @@ -1024,8 +1015,6 @@ define half @test_sqrt(half %0) #0 { ; ; BWON-F16C-LABEL: test_sqrt: ; BWON-F16C: # %bb.0: # %entry -; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax -; BWON-F16C-NEXT: vmovd %eax, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -1136,9 +1125,7 @@ define void @main.45() #0 { ; CHECK-LIBCALL-NEXT: pushq %rbx ; CHECK-LIBCALL-NEXT: pushq %rax ; CHECK-LIBCALL-NEXT: pinsrw $0, (%rax), %xmm0 -; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax -; CHECK-LIBCALL-NEXT: movd %eax, %xmm1 -; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] ; CHECK-LIBCALL-NEXT: movq %xmm1, %rbx ; CHECK-LIBCALL-NEXT: movq %rbx, %r14 ; CHECK-LIBCALL-NEXT: shrq $48, %r14 @@ -1167,15 +1154,12 @@ define void @main.45() #0 { ; BWON-F16C-LABEL: main.45: ; BWON-F16C: # %bb.0: # %entry ; BWON-F16C-NEXT: vpinsrw $0, (%rax), %xmm0, %xmm0 -; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; BWON-F16C-NEXT: vmovd %eax, %xmm1 -; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm1 ; BWON-F16C-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; BWON-F16C-NEXT: vcmpunordps %xmm2, %xmm0, %xmm0 -; BWON-F16C-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 -; BWON-F16C-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; BWON-F16C-NEXT: vcmpunordps %xmm2, %xmm1, %xmm1 +; BWON-F16C-NEXT: vpackssdw %xmm1, %xmm1, %xmm1 +; BWON-F16C-NEXT: vpblendvb %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; BWON-F16C-NEXT: vmovq %xmm0, (%rax) ; BWON-F16C-NEXT: retq ; @@ -1185,12 +1169,11 @@ define void @main.45() #0 { ; CHECK-I686-NEXT: pushl %esi ; CHECK-I686-NEXT: subl $20, %esp ; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 -; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax -; CHECK-I686-NEXT: movd %eax, %xmm0 -; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; CHECK-I686-NEXT: movd %xmm0, %esi +; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] +; CHECK-I686-NEXT: movd %xmm1, %esi ; CHECK-I686-NEXT: movl %esi, %edi ; CHECK-I686-NEXT: shrl $16, %edi +; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax ; CHECK-I686-NEXT: movw %ax, (%esp) ; CHECK-I686-NEXT: calll __extendhfsf2 ; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) @@ -1336,13 +1319,9 @@ define half @pr61271(half %0, half %1) #0 { ; ; BWON-F16C-LABEL: pr61271: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax -; BWON-F16C-NEXT: vpextrw $0, %xmm1, %ecx -; BWON-F16C-NEXT: vmovd %ecx, %xmm0 -; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; BWON-F16C-NEXT: vmovd %eax, %xmm1 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; BWON-F16C-NEXT: vminss %xmm0, %xmm1, %xmm0 +; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; BWON-F16C-NEXT: vminss %xmm1, %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; BWON-F16C-NEXT: vmovd %xmm0, %eax ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/pr114520.ll b/llvm/test/CodeGen/X86/pr114520.ll index c557da6b3ab8c..9bd1f49ff67c9 100644 --- a/llvm/test/CodeGen/X86/pr114520.ll +++ b/llvm/test/CodeGen/X86/pr114520.ll @@ -5,7 +5,6 @@ define half @test1(half %x) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpextrw $0, %xmm0, %eax -; CHECK-NEXT: vmovd %eax, %xmm0 ; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movl $64512, %ecx # imm = 0xFC00 diff --git a/llvm/test/CodeGen/X86/pr31088.ll b/llvm/test/CodeGen/X86/pr31088.ll index ce37622c476db..1c9c8e40c009d 100644 --- a/llvm/test/CodeGen/X86/pr31088.ll +++ b/llvm/test/CodeGen/X86/pr31088.ll @@ -51,17 +51,7 @@ define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind { ; ; F16C-O0-LABEL: ir_fadd_v1f16: ; F16C-O0: # %bb.0: -; F16C-O0-NEXT: vpextrw $0, %xmm1, %eax -; F16C-O0-NEXT: movw %ax, %cx -; F16C-O0-NEXT: # implicit-def: $eax -; F16C-O0-NEXT: movw %cx, %ax -; F16C-O0-NEXT: vmovd %eax, %xmm1 ; F16C-O0-NEXT: vcvtph2ps %xmm1, %xmm1 -; F16C-O0-NEXT: vpextrw $0, %xmm0, %eax -; F16C-O0-NEXT: movw %ax, %cx -; F16C-O0-NEXT: # implicit-def: $eax -; F16C-O0-NEXT: movw %cx, %ax -; F16C-O0-NEXT: vmovd %eax, %xmm0 ; F16C-O0-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-O0-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; F16C-O0-NEXT: vcvtps2ph $4, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/pr86305.ll b/llvm/test/CodeGen/X86/pr86305.ll index 79b42bb2532ca..0d2e1abe8e5fc 100644 --- a/llvm/test/CodeGen/X86/pr86305.ll +++ b/llvm/test/CodeGen/X86/pr86305.ll @@ -28,17 +28,16 @@ define <4 x bfloat> @fptrunc_v4f32(<4 x float> %a) nounwind { ; CHECK-LABEL: fptrunc_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: subq $64, %rsp ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: callq __truncsfbf2@PLT ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = mem[1,0] +; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,1,3,3] ; CHECK-NEXT: callq __truncsfbf2@PLT -; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vpshufd $255, (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-NEXT: callq __truncsfbf2@PLT @@ -49,24 +48,21 @@ define <4 x bfloat> @fptrunc_v4f32(<4 x float> %a) nounwind { ; CHECK-NEXT: vpextrw $0, %xmm0, %ebp ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vpextrw $0, %xmm0, %r14d -; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: vpextrw $0, %xmm0, %r15d -; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = mem[1,1,3,3] +; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq __truncsfbf2@PLT ; CHECK-NEXT: vpextrw $0, %xmm0, %eax -; CHECK-NEXT: vmovd %r15d, %xmm0 -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; CHECK-NEXT: vpinsrw $2, %r14d, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: vpinsrw $1, %r14d, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrw $3, %ebp, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrw $4, %ebx, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrw $5, %ebx, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrw $6, %ebx, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrw $7, %ebx, %xmm0, %xmm0 -; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: addq $64, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r14 -; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq %b = fptrunc <4 x float> %a to <4 x bfloat> diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index 54acd012d1fe4..ec916060563a7 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -814,15 +814,13 @@ define float @load_cvt_i16_to_f32(ptr %a0) nounwind { ; ; F16C-LABEL: load_cvt_i16_to_f32: ; F16C: # %bb.0: -; F16C-NEXT: movzwl (%rdi), %eax -; F16C-NEXT: vmovd %eax, %xmm0 +; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: retq ; ; AVX512-LABEL: load_cvt_i16_to_f32: ; AVX512: # %bb.0: -; AVX512-NEXT: movzwl (%rdi), %eax -; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load i16, ptr %a0 @@ -1830,16 +1828,14 @@ define double @load_cvt_i16_to_f64(ptr %a0) nounwind { ; ; F16C-LABEL: load_cvt_i16_to_f64: ; F16C: # %bb.0: -; F16C-NEXT: movzwl (%rdi), %eax -; F16C-NEXT: vmovd %eax, %xmm0 +; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; F16C-NEXT: retq ; ; AVX512-LABEL: load_cvt_i16_to_f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movzwl (%rdi), %eax -; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: retq From a4d45fe8462bf7042bac2edfb87e3e41e4156ba4 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 17 Jan 2025 14:56:39 +0000 Subject: [PATCH 17/88] [lldb][DWARF] Change GetAttributes parameter from SmallVector to SmallVectorImpl Fixes the lldb-arm-ubuntu buildbot failure: ``` ../llvm-project/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp:374:26: error: non-const lvalue reference to type 'SmallVector<[...], (default) CalculateSmallVectorDefaultInlinedElements::value aka 6>' cannot bind to a value of unrelated type 'SmallVector<[...], 3>' 374 | if (!::GetAttributes(worklist, seen, attributes)) { | ^~~~~~~~ ../llvm-project/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp:288:56: note: passing argument to parameter 'worklist' here 288 | static bool GetAttributes(llvm::SmallVector &worklist, | ^ 1 error generated. ``` --- lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index c2edc52aa964f..aafdd2ec68309 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -285,7 +285,7 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges( /// Adds all attributes of the DIE at the top of the \c worklist to the /// \c attributes list. Specifcations and abstract origins are added /// to the \c worklist if the referenced DIE has not been seen before. -static bool GetAttributes(llvm::SmallVector &worklist, +static bool GetAttributes(llvm::SmallVectorImpl &worklist, llvm::SmallSet &seen, DWARFAttributes &attributes) { assert(!worklist.empty() && "Need at least one DIE to visit."); From edf3a55bcecc8b0441a7a5fe6bda2023f86667a3 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Fri, 17 Jan 2025 15:06:18 +0000 Subject: [PATCH 18/88] [LoopVectorize][NFC] Centralize the setting of CostKind (#121937) In each class which calculates instruction costs (VPCostContext, LoopVectorizationCostModel, GeneratedRTChecks) set the CostKind once in the constructor instead of in each function that calculates a cost. This is in preparation for potentially changing the CostKind when compiling for optsize. --- .../Transforms/Vectorize/LoopVectorize.cpp | 120 ++++++++++-------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlan.h | 6 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 87 ++++++------- 4 files changed, 108 insertions(+), 107 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6df11abda9e98..d79d9e8445b3d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -987,7 +987,7 @@ class LoopVectorizationCostModel { InterleavedAccessInfo &IAI) : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F), - Hints(Hints), InterleaveInfo(IAI) {} + Hints(Hints), InterleaveInfo(IAI), CostKind(TTI::TCK_RecipThroughput) {} /// \return An upper bound for the vectorization factors (both fixed and /// scalable). If the factors are 0, vectorization and interleaving should be @@ -1555,9 +1555,9 @@ class LoopVectorizationCostModel { /// Return the cost of instructions in an inloop reduction pattern, if I is /// part of that pattern. - std::optional - getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, - TTI::TargetCostKind CostKind) const; + std::optional getReductionPatternCost(Instruction *I, + ElementCount VF, + Type *VectorTy) const; /// Returns true if \p Op should be considered invariant and if it is /// trivially hoistable. @@ -1616,8 +1616,8 @@ class LoopVectorizationCostModel { /// Estimate the overhead of scalarizing an instruction. This is a /// convenience wrapper for the type-based getScalarizationOverhead API. - InstructionCost getScalarizationOverhead(Instruction *I, ElementCount VF, - TTI::TargetCostKind CostKind) const; + InstructionCost getScalarizationOverhead(Instruction *I, + ElementCount VF) const; /// Returns true if an artificially high cost for emulated masked memrefs /// should be used. @@ -1798,6 +1798,9 @@ class LoopVectorizationCostModel { /// All element types found in the loop. SmallPtrSet ElementTypesInLoop; + + /// The kind of cost that we are calculating + TTI::TargetCostKind CostKind; }; } // end namespace llvm @@ -1838,13 +1841,17 @@ class GeneratedRTChecks { PredicatedScalarEvolution &PSE; + /// The kind of cost that we are calculating + TTI::TargetCostKind CostKind; + public: GeneratedRTChecks(PredicatedScalarEvolution &PSE, DominatorTree *DT, LoopInfo *LI, TargetTransformInfo *TTI, - const DataLayout &DL, bool AddBranchWeights) + const DataLayout &DL, bool AddBranchWeights, + TTI::TargetCostKind CostKind) : DT(DT), LI(LI), TTI(TTI), SCEVExp(*PSE.getSE(), DL, "scev.check"), MemCheckExp(*PSE.getSE(), DL, "scev.check"), - AddBranchWeights(AddBranchWeights), PSE(PSE) {} + AddBranchWeights(AddBranchWeights), PSE(PSE), CostKind(CostKind) {} /// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can /// accurately estimate the cost of the runtime checks. The blocks are @@ -1956,8 +1963,7 @@ class GeneratedRTChecks { for (Instruction &I : *SCEVCheckBlock) { if (SCEVCheckBlock->getTerminator() == &I) continue; - InstructionCost C = - TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput); + InstructionCost C = TTI->getInstructionCost(&I, CostKind); LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); RTCheckCost += C; } @@ -1966,8 +1972,7 @@ class GeneratedRTChecks { for (Instruction &I : *MemCheckBlock) { if (MemCheckBlock->getTerminator() == &I) continue; - InstructionCost C = - TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput); + InstructionCost C = TTI->getInstructionCost(&I, CostKind); LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); MemCheckCost += C; } @@ -2928,10 +2933,9 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, if (!VF.isScalar()) return CallWideningDecisions.at(std::make_pair(CI, VF)).Cost; - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; Type *RetTy = CI->getType(); if (RecurrenceDescriptor::isFMulAddIntrinsic(CI)) - if (auto RedCost = getReductionPatternCost(CI, VF, RetTy, CostKind)) + if (auto RedCost = getReductionPatternCost(CI, VF, RetTy)) return *RedCost; SmallVector Tys; @@ -2974,8 +2978,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, IntrinsicCostAttributes CostAttrs(ID, RetTy, Arguments, ParamTys, FMF, dyn_cast(CI)); - return TTI.getIntrinsicInstrCost(CostAttrs, - TargetTransformInfo::TCK_RecipThroughput); + return TTI.getIntrinsicInstrCost(CostAttrs, CostKind); } void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { @@ -3432,8 +3435,6 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I, I->getOpcode() == Instruction::URem); assert(!isSafeToSpeculativelyExecute(I)); - const TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - // Scalarization isn't legal for scalable vector types InstructionCost ScalarizationCost = InstructionCost::getInvalid(); if (!VF.isScalable()) { @@ -3455,7 +3456,7 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I, // The cost of insertelement and extractelement instructions needed for // scalarization. - ScalarizationCost += getScalarizationOverhead(I, VF, CostKind); + ScalarizationCost += getScalarizationOverhead(I, VF); // Scale the cost by the probability of executing the predicated blocks. // This assumes the predicated block for each vector lane is equally @@ -4445,7 +4446,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( for (const auto &Plan : VPlans) { for (ElementCount VF : Plan->vectorFactors()) { VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), - CM); + CM, CM.CostKind); precomputeCosts(*Plan, VF, CostCtx); auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { @@ -5595,7 +5596,6 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; if (isScalarWithPredication(I, VF) && !I->getType()->isVoidTy()) { ScalarCost += TTI.getScalarizationOverhead( cast(toVectorTy(I->getType(), VF)), @@ -5742,7 +5742,6 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // Don't pass *I here, since it is scalar but will actually be part of a // vectorized loop where the user of it is a vectorized instruction. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; const Align Alignment = getLoadStoreAlignment(I); Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), @@ -5750,7 +5749,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. - Cost += getScalarizationOverhead(I, VF, CostKind); + Cost += getScalarizationOverhead(I, VF); // If we have a predicated load/store, it will need extra i1 extracts and // conditional branches, but may not be executed for each vector lane. Scale @@ -5783,7 +5782,6 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, Value *Ptr = getLoadStorePointerOperand(I); unsigned AS = getLoadStoreAddressSpace(I); int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr); - enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && "Stride should be 1 or -1 for consecutive memory access"); @@ -5814,12 +5812,12 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, auto *VectorTy = cast(toVectorTy(ValTy, VF)); const Align Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); - enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; if (isa(I)) { return TTI.getAddressComputationCost(ValTy) + TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS, CostKind) + - TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy, {}, + CostKind); } StoreInst *SI = cast(I); @@ -5842,9 +5840,9 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, const Value *Ptr = getLoadStorePointerOperand(I); return TTI.getAddressComputationCost(VectorTy) + - TTI.getGatherScatterOpCost( - I->getOpcode(), VectorTy, Ptr, Legal->isMaskRequired(I), Alignment, - TargetTransformInfo::TCK_RecipThroughput, I); + TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, + Legal->isMaskRequired(I), Alignment, + CostKind, I); } InstructionCost @@ -5857,7 +5855,6 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, Type *ValTy = getLoadStoreType(InsertPos); auto *VectorTy = cast(toVectorTy(ValTy, VF)); unsigned AS = getLoadStoreAddressSpace(InsertPos); - enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; unsigned InterleaveFactor = Group->getFactor(); auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); @@ -5889,9 +5886,9 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, } std::optional -LoopVectorizationCostModel::getReductionPatternCost( - Instruction *I, ElementCount VF, Type *Ty, - TTI::TargetCostKind CostKind) const { +LoopVectorizationCostModel::getReductionPatternCost(Instruction *I, + ElementCount VF, + Type *Ty) const { using namespace llvm::PatternMatch; // Early exit for no inloop reductions if (InLoopReductions.empty() || VF.isScalar() || !isa(Ty)) @@ -6082,14 +6079,15 @@ LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0)); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, - TTI::TCK_RecipThroughput, OpInfo, I); + TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, CostKind, + OpInfo, I); } return getWideningCost(I, VF); } -InstructionCost LoopVectorizationCostModel::getScalarizationOverhead( - Instruction *I, ElementCount VF, TTI::TargetCostKind CostKind) const { +InstructionCost +LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, + ElementCount VF) const { // There is no mechanism yet to create a scalable scalarization loop, // so this is currently Invalid. @@ -6332,7 +6330,6 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { InstructionCost ScalarCost = InstructionCost::getInvalid(); InstructionCost VectorCost = InstructionCost::getInvalid(); InstructionCost IntrinsicCost = InstructionCost::getInvalid(); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; Function *ScalarFunc = CI->getCalledFunction(); Type *ScalarRetTy = CI->getType(); SmallVector Tys, ScalarTys; @@ -6348,8 +6345,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { // Compute costs of unpacking argument values for the scalar calls and // packing the return values to a vector. - InstructionCost ScalarizationCost = - getScalarizationOverhead(CI, VF, CostKind); + InstructionCost ScalarizationCost = getScalarizationOverhead(CI, VF); ScalarCost = ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost; // Honor ForcedScalars and UniformAfterVectorization decisions. @@ -6373,7 +6369,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { // An in-loop reduction using an fmuladd intrinsic is a special case; // we don't want the normal cost for that intrinsic. if (RecurrenceDescriptor::isFMulAddIntrinsic(CI)) - if (auto RedCost = getReductionPatternCost(CI, VF, RetTy, CostKind)) { + if (auto RedCost = getReductionPatternCost(CI, VF, RetTy)) { setCallWideningDecision(CI, VF, CM_IntrinsicCall, nullptr, getVectorIntrinsicIDForCall(CI, TLI), std::nullopt, *RedCost); @@ -6458,7 +6454,8 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { TargetTransformInfo::SK_Broadcast, VectorType::get(IntegerType::getInt1Ty( VecFunc->getFunctionType()->getContext()), - VF)); + VF), + {}, CostKind); if (TLI && VecFunc && !CI->isNoBuiltin()) VectorCost = @@ -6526,7 +6523,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); auto *SE = PSE.getSE(); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; auto HasSingleCopyAfterVectorization = [this](Instruction *I, ElementCount VF) -> bool { @@ -6702,7 +6698,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, InstructionCost MulCost = TTI::TCC_Free; ConstantInt *RHS = dyn_cast(I->getOperand(1)); if (!RHS || RHS->getZExtValue() != 1) - MulCost = TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy); + MulCost = + TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind); // Find the cost of the histogram operation itself. Type *PtrTy = VectorType::get(HGram->Load->getPointerOperandType(), VF); @@ -6713,9 +6710,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, {PtrTy, ScalarTy, MaskTy}); // Add the costs together with the add/sub operation. - return TTI.getIntrinsicInstrCost( - ICA, TargetTransformInfo::TCK_RecipThroughput) + - MulCost + TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy); + return TTI.getIntrinsicInstrCost(ICA, CostKind) + MulCost + + TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, CostKind); } [[fallthrough]]; } @@ -6740,7 +6736,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, return 0; // Detect reduction patterns - if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind)) + if (auto RedCost = getReductionPatternCost(I, VF, VectorTy)) return *RedCost; // Certain instructions can be cheaper to vectorize if they have a constant @@ -6905,7 +6901,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, } // Detect reduction patterns - if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind)) + if (auto RedCost = getReductionPatternCost(I, VF, VectorTy)) return *RedCost; Type *SrcScalarTy = I->getOperand(0)->getType(); @@ -6930,7 +6926,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, case Instruction::Call: return getVectorCallCost(cast(I), VF); case Instruction::ExtractValue: - return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput); + return TTI.getInstructionCost(I, CostKind); case Instruction::Alloca: // We cannot easily widen alloca to a scalable alloca, as // the result would need to be a vector of pointers. @@ -7442,8 +7438,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, // Pre-compute the cost for I, if it has a reduction pattern cost. for (Instruction *I : ChainOpsAndOperands) { - auto ReductionCost = CM.getReductionPatternCost( - I, VF, toVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput); + auto ReductionCost = + CM.getReductionPatternCost(I, VF, toVectorTy(I->getType(), VF)); if (!ReductionCost) continue; @@ -7501,7 +7497,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, ElementCount VF) const { - VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM); + VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM, + CM.CostKind); InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx); // Now compute and add the VPlan-based cost. @@ -7581,6 +7578,16 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1) return {*FirstPlan.vectorFactors().begin(), 0, 0}; + LLVM_DEBUG(dbgs() << "LV: Computing best VF using cost kind: " + << (CM.CostKind == TTI::TCK_RecipThroughput + ? "Reciprocal Throughput\n" + : CM.CostKind == TTI::TCK_Latency + ? "Instruction Latency\n" + : CM.CostKind == TTI::TCK_CodeSize ? "Code Size\n" + : CM.CostKind == TTI::TCK_SizeAndLatency + ? "Code Size and Latency\n" + : "Unknown\n")); + ElementCount ScalarVF = ElementCount::getFixed(1); assert(hasPlanWithVF(ScalarVF) && "More than a single plan/VF w/o any plan having scalar VF"); @@ -7634,7 +7641,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { // simplifications not accounted for in the legacy cost model. If that's the // case, don't trigger the assertion, as the extra simplifications may cause a // different VF to be picked by the VPlan-based cost model. - VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM); + VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM, + CM.CostKind); precomputeCosts(BestPlan, BestFactor.Width, CostCtx); assert((BestFactor.Width == LegacyVF.Width || planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), @@ -10155,7 +10163,7 @@ static bool processLoopInVPlanNativePath( bool AddBranchWeights = hasBranchWeightMD(*L->getLoopLatch()->getTerminator()); GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), - AddBranchWeights); + AddBranchWeights, CM.CostKind); InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, VF.Width, 1, LVL, &CM, BFI, PSI, Checks, BestPlan); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" @@ -10692,7 +10700,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool AddBranchWeights = hasBranchWeightMD(*L->getLoopLatch()->getTerminator()); GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), - AddBranchWeights); + AddBranchWeights, CM.CostKind); if (LVP.hasPlanWithVF(VF.Width)) { // Select the interleave count. IC = CM.selectInterleaveCount(VF.Width, VF.Cost); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index aa41c41e90c4c..f1228368804be 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -770,7 +770,7 @@ InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) { InstructionCost BackedgeCost = ForceTargetInstructionCost.getNumOccurrences() ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences()) - : Ctx.TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput); + : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind); LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF << ": vector loop backedge\n"); Cost += BackedgeCost; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index eceddff6be6ff..784cee6ed4b06 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -686,11 +686,13 @@ struct VPCostContext { LLVMContext &LLVMCtx; LoopVectorizationCostModel &CM; SmallPtrSet SkipCostComputation; + TargetTransformInfo::TargetCostKind CostKind; VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, - Type *CanIVTy, LoopVectorizationCostModel &CM) + Type *CanIVTy, LoopVectorizationCostModel &CM, + TargetTransformInfo::TargetCostKind CostKind) : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()), - CM(CM) {} + CM(CM), CostKind(CostKind) {} /// Return the cost for \p UI with \p VF using the legacy cost model as /// fallback until computing the cost of all recipes migrates to VPlan. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 5ae2f43e4950c..aa5f92b235555 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -991,10 +991,9 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { InstructionCost VPWidenCallRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(), Variant->getFunctionType()->params(), - CostKind); + Ctx.CostKind); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1072,8 +1071,6 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - // Some backends analyze intrinsic arguments to determine cost. Use the // underlying value for the operand if it has one. Otherwise try to use the // operand of the underlying call instruction, if there is one. Otherwise @@ -1113,7 +1110,7 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF, IntrinsicCostAttributes CostAttrs( VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF, dyn_cast_or_null(getUnderlyingValue())); - return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind); + return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind); } StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const { @@ -1196,7 +1193,7 @@ InstructionCost VPHistogramRecipe::computeCost(ElementCount VF, // Assume that a non-constant update value (or a constant != 1) requires // a multiply, and add that into the cost. InstructionCost MulCost = - Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy); + Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind); if (IncAmt->isLiveIn()) { ConstantInt *CI = dyn_cast(IncAmt->getLiveInIRValue()); @@ -1212,9 +1209,8 @@ InstructionCost VPHistogramRecipe::computeCost(ElementCount VF, {PtrTy, IncTy, MaskTy}); // Add the costs together with the add/sub operation. - return Ctx.TTI.getIntrinsicInstrCost( - ICA, TargetTransformInfo::TCK_RecipThroughput) + - MulCost + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy); + return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost + + Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1278,7 +1274,6 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF, bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions(); Type *ScalarTy = Ctx.Types.inferScalarType(this); Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; VPValue *Op0, *Op1; using namespace llvm::VPlanPatternMatch; @@ -1296,8 +1291,8 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF, Operands.append(SI->op_begin(), SI->op_end()); bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))); return Ctx.TTI.getArithmeticInstrCost( - IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, CostKind, - {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI); + IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy, + Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI); } Type *CondTy = Ctx.Types.inferScalarType(getOperand(0)); @@ -1307,9 +1302,9 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF, CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; if (auto *Cmp = dyn_cast(SI->getCondition())) Pred = Cmp->getPredicate(); - return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy, CondTy, Pred, - CostKind, {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_AnyValue, TTI::OP_None}, SI); + return Ctx.TTI.getCmpSelInstrCost( + Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI); } VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy( @@ -1454,12 +1449,11 @@ void VPWidenRecipe::execute(VPTransformState &State) { InstructionCost VPWidenRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; switch (Opcode) { case Instruction::FNeg: { Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); return Ctx.TTI.getArithmeticInstrCost( - Opcode, VectorTy, CostKind, + Opcode, VectorTy, Ctx.CostKind, {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}); } @@ -1502,21 +1496,22 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, if (CtxI) Operands.append(CtxI->value_op_begin(), CtxI->value_op_end()); return Ctx.TTI.getArithmeticInstrCost( - Opcode, VectorTy, CostKind, + Opcode, VectorTy, Ctx.CostKind, {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, RHSInfo, Operands, CtxI, &Ctx.TLI); } case Instruction::Freeze: { // This opcode is unknown. Assume that it is the same as 'mul'. Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); - return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind); + return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, + Ctx.CostKind); } case Instruction::ICmp: case Instruction::FCmp: { Instruction *CtxI = dyn_cast_or_null(getUnderlyingValue()); Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(), - CostKind, + Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, CtxI); } @@ -1646,7 +1641,7 @@ InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF, auto *DestTy = cast(toVectorTy(getResultType(), VF)); // Arm TTI will use the underlying instruction to determine the cost. return Ctx.TTI.getCastInstrCost( - Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput, + Opcode, DestTy, SrcTy, CCH, Ctx.CostKind, dyn_cast_if_present(getUnderlyingValue())); } @@ -1664,7 +1659,7 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { - return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput); + return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); } /// This function adds @@ -2143,18 +2138,16 @@ void VPBlendRecipe::execute(VPTransformState &State) { InstructionCost VPBlendRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - // Handle cases where only the first lane is used the same way as the legacy // cost model. if (vputils::onlyFirstLaneUsed(this)) - return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind); + return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF); return (getNumIncomingValues() - 1) * Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2274,7 +2267,6 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF, RecurKind RdxKind = RdxDesc.getRecurrenceKind(); Type *ElementTy = Ctx.Types.inferScalarType(this); auto *VectorTy = cast(toVectorTy(ElementTy, VF)); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; unsigned Opcode = RdxDesc.getOpcode(); // TODO: Support any-of and in-loop reductions. @@ -2292,15 +2284,15 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF, // Cost = Reduction cost + BinOp cost InstructionCost Cost = - Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, CostKind); + Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind); if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) { Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind); return Cost + Ctx.TTI.getMinMaxReductionCost( - Id, VectorTy, RdxDesc.getFastMathFlags(), CostKind); + Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind); } return Cost + Ctx.TTI.getArithmeticReductionCost( - Opcode, VectorTy, RdxDesc.getFastMathFlags(), CostKind); + Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2531,7 +2523,6 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, getLoadStoreAlignment(const_cast(&Ingredient)); unsigned AS = getLoadStoreAddressSpace(const_cast(&Ingredient)); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; if (!Consecutive) { // TODO: Using the original IR may not be accurate. @@ -2542,25 +2533,26 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, "Inconsecutive memory access should not have the order."); return Ctx.TTI.getAddressComputationCost(Ty) + Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr, - IsMasked, Alignment, CostKind, + IsMasked, Alignment, Ctx.CostKind, &Ingredient); } InstructionCost Cost = 0; if (IsMasked) { Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, - AS, CostKind); + AS, Ctx.CostKind); } else { TTI::OperandValueInfo OpInfo = Ctx.TTI.getOperandInfo(Ingredient.getOperand(0)); Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS, - CostKind, OpInfo, &Ingredient); + Ctx.CostKind, OpInfo, &Ingredient); } if (!Reverse) return Cost; - return Cost += Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, - cast(Ty), {}, CostKind, 0); + return Cost += + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, + cast(Ty), {}, Ctx.CostKind, 0); } void VPWidenLoadRecipe::execute(VPTransformState &State) { @@ -2678,14 +2670,14 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, getLoadStoreAlignment(const_cast(&Ingredient)); unsigned AS = getLoadStoreAddressSpace(const_cast(&Ingredient)); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( - Ingredient.getOpcode(), Ty, Alignment, AS, CostKind); + Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind); if (!Reverse) return Cost; return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, - cast(Ty), {}, CostKind, 0); + cast(Ty), {}, Ctx.CostKind, + 0); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2799,14 +2791,14 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, getLoadStoreAlignment(const_cast(&Ingredient)); unsigned AS = getLoadStoreAddressSpace(const_cast(&Ingredient)); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( - Ingredient.getOpcode(), Ty, Alignment, AS, CostKind); + Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind); if (!Reverse) return Cost; return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, - cast(Ty), {}, CostKind, 0); + cast(Ty), {}, Ctx.CostKind, + 0); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3197,7 +3189,6 @@ InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF, : getStoredValues()[InsertPosIdx]); auto *VectorTy = cast(toVectorTy(ValTy, VF)); unsigned AS = getLoadStoreAddressSpace(InsertPos); - enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; unsigned InterleaveFactor = IG->getFactor(); auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); @@ -3211,14 +3202,15 @@ InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF, // Calculate the cost of the whole interleaved group. InstructionCost Cost = Ctx.TTI.getInterleavedMemoryOpCost( InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices, - IG->getAlign(), AS, CostKind, getMask(), NeedsMaskForGaps); + IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps); if (!IG->isReverse()) return Cost; return Cost + IG->getNumMembers() * Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, - VectorTy, std::nullopt, CostKind, 0); + VectorTy, std::nullopt, Ctx.CostKind, + 0); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3428,9 +3420,8 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { InstructionCost VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; if (VF.isScalar()) - return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind); + return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); if (VF.isScalable() && VF.getKnownMinValue() == 1) return InstructionCost::getInvalid(); @@ -3441,7 +3432,7 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF); return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice, - cast(VectorTy), Mask, CostKind, + cast(VectorTy), Mask, Ctx.CostKind, VF.getKnownMinValue() - 1); } From 2f853d851bb0eb4ba3d827909300839037d4b8fe Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 17 Jan 2025 14:52:42 +0000 Subject: [PATCH 19/88] [X86] Regenerate VMOVSH assembly comments. NFC. --- llvm/test/CodeGen/X86/avx512fp16-arith.ll | 4 ++-- llvm/test/CodeGen/X86/avx512fp16-cvt.ll | 24 +++++++++---------- .../CodeGen/X86/avx512fp16-unsafe-fp-math.ll | 4 ++-- llvm/test/CodeGen/X86/cvt16-2.ll | 6 ++--- .../test/CodeGen/X86/fp-strict-scalar-fp16.ll | 24 +++++++++---------- .../X86/fp-strict-scalar-round-fp16.ll | 2 +- llvm/test/CodeGen/X86/half-darwin.ll | 4 ++-- 7 files changed, 34 insertions(+), 34 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll index 8d811d8d29e06..9838c6c858bd6 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll @@ -179,7 +179,7 @@ define half @add_sh(half %i, half %j, ptr %x.ptr) nounwind readnone { define half @sub_sh(half %i, half %j, ptr %x.ptr) nounwind readnone { ; CHECK-LABEL: sub_sh: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovsh (%rdi), %xmm2 +; CHECK-NEXT: vmovsh {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vsubsh %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -216,7 +216,7 @@ define half @mul_sh(half %i, half %j, ptr %x.ptr) nounwind readnone { define half @div_sh(half %i, half %j, ptr %x.ptr) nounwind readnone { ; CHECK-LABEL: div_sh: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovsh (%rdi), %xmm2 +; CHECK-NEXT: vmovsh {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vdivsh %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll index e1e013528738a..3040e58b37997 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll @@ -144,7 +144,7 @@ define float @f16tof32(half %b) nounwind { ; X86-LABEL: f16tof32: ; X86: # %bb.0: ; X86-NEXT: pushl %eax -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -166,7 +166,7 @@ define double @f16tof64(half %b) nounwind { ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: vmovsh 8(%ebp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) @@ -356,7 +356,7 @@ define <8 x half> @f64to8f16(<8 x double> %b) { define float @extload_f16_f32(ptr %x) { ; X64-LABEL: extload_f16_f32: ; X64: # %bb.0: -; X64-NEXT: vmovsh (%rdi), %xmm0 +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; X64-NEXT: retq ; @@ -365,7 +365,7 @@ define float @extload_f16_f32(ptr %x) { ; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: vmovsh (%eax), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) @@ -380,7 +380,7 @@ define float @extload_f16_f32(ptr %x) { define double @extload_f16_f64(ptr %x) { ; X64-LABEL: extload_f16_f64: ; X64: # %bb.0: -; X64-NEXT: vmovsh (%rdi), %xmm0 +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X64-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0 ; X64-NEXT: retq ; @@ -394,7 +394,7 @@ define double @extload_f16_f64(ptr %x) { ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: vmovsh (%eax), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) @@ -777,7 +777,7 @@ define i64 @half_to_s64(half %x) { ; ; X86-LABEL: half_to_s64: ; X86: # %bb.0: -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvttph2qq %xmm0, %xmm0 ; X86-NEXT: vmovd %xmm0, %eax ; X86-NEXT: vpextrd $1, %xmm0, %edx @@ -808,7 +808,7 @@ define i128 @half_to_s128(half %x) { ; X86-NEXT: subl $48, %esp ; X86-NEXT: .cfi_offset %esi, -12 ; X86-NEXT: movl 8(%ebp), %esi -; X86-NEXT: vmovsh 12(%ebp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vmovsh %xmm0, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, (%esp) @@ -880,7 +880,7 @@ define i64 @half_to_u64(half %x) { ; ; X86-LABEL: half_to_u64: ; X86: # %bb.0: -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvttph2uqq %xmm0, %xmm0 ; X86-NEXT: vmovd %xmm0, %eax ; X86-NEXT: vpextrd $1, %xmm0, %edx @@ -911,7 +911,7 @@ define i128 @half_to_u128(half %x) { ; X86-NEXT: subl $48, %esp ; X86-NEXT: .cfi_offset %esi, -12 ; X86-NEXT: movl 8(%ebp), %esi -; X86-NEXT: vmovsh 12(%ebp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vmovsh %xmm0, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, (%esp) @@ -940,7 +940,7 @@ define x86_fp80 @half_to_f80(half %x) nounwind { ; X86-LABEL: half_to_f80: ; X86: # %bb.0: ; X86-NEXT: pushl %eax -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vmovsh %xmm0, (%esp) ; X86-NEXT: calll __extendhfxf2 ; X86-NEXT: popl %eax @@ -990,7 +990,7 @@ define fp128 @half_to_f128(half %x) nounwind { ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $48, %esp ; X86-NEXT: movl 8(%ebp), %esi -; X86-NEXT: vmovsh 12(%ebp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax diff --git a/llvm/test/CodeGen/X86/avx512fp16-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/avx512fp16-unsafe-fp-math.ll index c9b45983e09a8..5b92ce76d5736 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-unsafe-fp-math.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-unsafe-fp-math.ll @@ -112,7 +112,7 @@ define half @test_max_f16(half %a, ptr %ptr) { ; ; CHECK-LABEL: test_max_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmovsh (%rdi), %xmm1 +; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vmaxsh %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: @@ -130,7 +130,7 @@ define half @test_min_f16(half %a, ptr %ptr) { ; ; CHECK-LABEL: test_min_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmovsh (%rdi), %xmm1 +; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vminsh %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/cvt16-2.ll b/llvm/test/CodeGen/X86/cvt16-2.ll index bab6768b16322..8dbbc57f10564 100644 --- a/llvm/test/CodeGen/X86/cvt16-2.ll +++ b/llvm/test/CodeGen/X86/cvt16-2.ll @@ -34,7 +34,7 @@ define float @test2(ptr nocapture %src) { ; ; FP16-LABEL: test2: ; FP16: # %bb.0: -; FP16-NEXT: vmovsh (%rdi), %xmm0 +; FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; FP16-NEXT: retq %1 = load i16, ptr %src, align 2 @@ -77,7 +77,7 @@ define double @test4(ptr nocapture %src) { ; ; FP16-LABEL: test4: ; FP16: # %bb.0: -; FP16-NEXT: vmovsh (%rdi), %xmm0 +; FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; FP16-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0 ; FP16-NEXT: retq %1 = load i16, ptr %src, align 2 @@ -123,7 +123,7 @@ define x86_fp80 @test6(ptr nocapture %src) { ; FP16: # %bb.0: ; FP16-NEXT: pushq %rax ; FP16-NEXT: .cfi_def_cfa_offset 16 -; FP16-NEXT: vmovsh (%rdi), %xmm0 +; FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; FP16-NEXT: callq __extendhfxf2@PLT ; FP16-NEXT: popq %rax ; FP16-NEXT: .cfi_def_cfa_offset 8 diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll index 3ecddd5279814..bf93c8a1f5b51 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll @@ -52,7 +52,7 @@ define half @fadd_f16(half %a, half %b) nounwind strictfp { ; ; X86-LABEL: fadd_f16: ; X86: # %bb.0: -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vaddsh {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-NEXT: retl ; @@ -102,7 +102,7 @@ define half @fsub_f16(half %a, half %b) nounwind strictfp { ; ; X86-LABEL: fsub_f16: ; X86: # %bb.0: -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vsubsh {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-NEXT: retl ; @@ -152,7 +152,7 @@ define half @fmul_f16(half %a, half %b) nounwind strictfp { ; ; X86-LABEL: fmul_f16: ; X86: # %bb.0: -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vmulsh {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-NEXT: retl ; @@ -202,7 +202,7 @@ define half @fdiv_f16(half %a, half %b) nounwind strictfp { ; ; X86-LABEL: fdiv_f16: ; X86: # %bb.0: -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vdivsh {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-NEXT: retl ; @@ -239,14 +239,14 @@ define void @fpext_f16_to_f32(ptr %val, ptr %ret) nounwind strictfp { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovsh (%ecx), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: fpext_f16_to_f32: ; X64: # %bb.0: -; X64-NEXT: vmovsh (%rdi), %xmm0 +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; X64-NEXT: vmovss %xmm0, (%rsi) ; X64-NEXT: retq @@ -282,14 +282,14 @@ define void @fpext_f16_to_f64(ptr %val, ptr %ret) nounwind strictfp { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovsh (%ecx), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: fpext_f16_to_f64: ; X64: # %bb.0: -; X64-NEXT: vmovsh (%rdi), %xmm0 +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X64-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0 ; X64-NEXT: vmovsd %xmm0, (%rsi) ; X64-NEXT: retq @@ -418,14 +418,14 @@ define void @fsqrt_f16(ptr %a) nounwind strictfp { ; X86-LABEL: fsqrt_f16: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: vmovsh (%eax), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsh %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: fsqrt_f16: ; X64: # %bb.0: -; X64-NEXT: vmovsh (%rdi), %xmm0 +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X64-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0 ; X64-NEXT: vmovsh %xmm0, (%rdi) ; X64-NEXT: retq @@ -510,8 +510,8 @@ define half @fma_f16(half %a, half %b, half %c) nounwind strictfp { ; ; X86-LABEL: fma_f16: ; X86: # %bb.0: -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm1 -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vfmadd213sh {{[0-9]+}}(%esp), %xmm1, %xmm0 ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll index 3b9798a2af582..6fe5dcd292930 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-round-fp16.ll @@ -273,7 +273,7 @@ define half @fround16(half %f) #0 { ; X86-LABEL: fround16: ; X86: # %bb.0: ; X86-NEXT: subl $8, %esp -; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: calll roundf diff --git a/llvm/test/CodeGen/X86/half-darwin.ll b/llvm/test/CodeGen/X86/half-darwin.ll index ec099db4e7ca7..7388429143df5 100644 --- a/llvm/test/CodeGen/X86/half-darwin.ll +++ b/llvm/test/CodeGen/X86/half-darwin.ll @@ -82,7 +82,7 @@ define float @extendhfsf(ptr %ptr) nounwind { ; ; CHECK-FP16-LABEL: extendhfsf: ; CHECK-FP16: ## %bb.0: -; CHECK-FP16-NEXT: vmovsh (%rdi), %xmm0 +; CHECK-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; CHECK-FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; CHECK-FP16-NEXT: retq @@ -174,7 +174,7 @@ define float @strict_extendhfsf(ptr %ptr) nounwind strictfp { ; ; CHECK-FP16-LABEL: strict_extendhfsf: ; CHECK-FP16: ## %bb.0: -; CHECK-FP16-NEXT: vmovsh (%rdi), %xmm0 +; CHECK-FP16-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero ; CHECK-FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0 ; CHECK-FP16-NEXT: retq From 123b24ff97402cc94dc74610330193244910a81f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 17 Jan 2025 15:05:01 +0000 Subject: [PATCH 20/88] [X86] avx512fp16-arith.ll - regenerate VPTERNLOG comments --- llvm/test/CodeGen/X86/avx512fp16-arith.ll | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll index 9838c6c858bd6..b264f5fc34688 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll @@ -329,7 +329,7 @@ define half @fcopysign(half %x, half %y) { ; CHECK-LABEL: fcopysign: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogd $226, %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1)) ; CHECK-NEXT: retq %a = call half @llvm.copysign.f16(half %x, half %y) ret half %a @@ -341,7 +341,7 @@ define half @fround(half %x) { ; CHECK: ## %bb.0: ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1] -; CHECK-NEXT: vpternlogq $248, %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vpternlogq {{.*#+}} xmm2 = xmm2 | (xmm0 & xmm1) ; CHECK-NEXT: vaddsh %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vrndscalesh $11, %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq @@ -384,7 +384,7 @@ declare <8 x half> @llvm.fabs.v8f16(<8 x half>) define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) { ; CHECK-LABEL: fcopysignv8f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0 +; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (mem & (xmm0 ^ xmm1)) ; CHECK-NEXT: retq %a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y) ret <8 x half> %a @@ -396,7 +396,7 @@ define <8 x half> @roundv8f16(<8 x half> %x) { ; CHECK: ## %bb.0: ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1] -; CHECK-NEXT: vpternlogq $248, %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vpternlogq {{.*#+}} xmm2 = xmm2 | (xmm0 & xmm1) ; CHECK-NEXT: vaddph %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vrndscaleph $11, %xmm0, %xmm0 ; CHECK-NEXT: retq @@ -439,7 +439,7 @@ declare <16 x half> @llvm.fabs.v16f16(<16 x half>) define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) { ; CHECK-LABEL: fcopysignv16f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0 +; CHECK-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1)) ; CHECK-NEXT: retq %a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y) ret <16 x half> %a @@ -451,7 +451,7 @@ define <16 x half> @roundv16f16(<16 x half> %x) { ; CHECK: ## %bb.0: ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1] -; CHECK-NEXT: vpternlogq $248, %ymm1, %ymm0, %ymm2 +; CHECK-NEXT: vpternlogq {{.*#+}} ymm2 = ymm2 | (ymm0 & ymm1) ; CHECK-NEXT: vaddph %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vrndscaleph $11, %ymm0, %ymm0 ; CHECK-NEXT: retq @@ -494,7 +494,7 @@ declare <32 x half> @llvm.fabs.v32f16(<32 x half>) define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) { ; CHECK-LABEL: fcopysignv32f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0 +; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (mem & (zmm0 ^ zmm1)) ; CHECK-NEXT: retq %a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y) ret <32 x half> %a @@ -506,7 +506,7 @@ define <32 x half> @roundv32f16(<32 x half> %x) { ; CHECK: ## %bb.0: ; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1] -; CHECK-NEXT: vpternlogq $248, %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 | (zmm0 & zmm1) ; CHECK-NEXT: vaddph %zmm2, %zmm0, %zmm0 ; CHECK-NEXT: vrndscaleph $11, %zmm0, %zmm0 ; CHECK-NEXT: retq From 8abbd76cfb14ae4a4cb020dd3eb761ddd1db14d1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 17 Jan 2025 15:07:45 +0000 Subject: [PATCH 21/88] [X86] Regenerate VFPCLASS assembly comments. NFC. --- llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll | 8 ++++---- llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll | 10 +++++----- llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll index 9db57fe68bb42..3ea79c856e1ca 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll @@ -41,7 +41,7 @@ define <8 x half> @test_fminimum_v8f16(<8 x half> %x, <8 x half> %y) "no-nans-fp define half @test_fminimum_nnan(half %x, half %y) "no-nans-fp-math"="true" { ; CHECK-LABEL: test_fminimum_nnan: ; CHECK: # %bb.0: -; CHECK-NEXT: vfpclasssh $5, %xmm1, %k1 +; CHECK-NEXT: vfpclasssh $5, %xmm1, %k1 # k1 = isQuietNaN(xmm1) | isNegativeZero(xmm1) ; CHECK-NEXT: vmovaps %xmm0, %xmm2 ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1} @@ -78,7 +78,7 @@ define half @test_fminimum_combine_cmps(half %x, half %y) { ; CHECK-LABEL: test_fminimum_combine_cmps: ; CHECK: # %bb.0: ; CHECK-NEXT: vdivsh %xmm0, %xmm1, %xmm1 -; CHECK-NEXT: vfpclasssh $5, %xmm0, %k1 +; CHECK-NEXT: vfpclasssh $5, %xmm0, %k1 # k1 = isQuietNaN(xmm0) | isNegativeZero(xmm0) ; CHECK-NEXT: vmovaps %xmm1, %xmm2 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1} @@ -121,7 +121,7 @@ define half @test_fmaximum_nnan(half %x, half %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm2 ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vfpclasssh $3, %xmm0, %k1 +; CHECK-NEXT: vfpclasssh $3, %xmm0, %k1 # k1 = isQuietNaN(xmm0) | isPositiveZero(xmm0) ; CHECK-NEXT: vmovaps %xmm2, %xmm1 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovsh %xmm2, %xmm0, %xmm0 {%k1} @@ -161,7 +161,7 @@ define half @test_fmaximum_combine_cmps(half %x, half %y) { ; CHECK-LABEL: test_fmaximum_combine_cmps: ; CHECK: # %bb.0: ; CHECK-NEXT: vdivsh %xmm0, %xmm1, %xmm1 -; CHECK-NEXT: vfpclasssh $3, %xmm0, %k1 +; CHECK-NEXT: vfpclasssh $3, %xmm0, %k1 # k1 = isQuietNaN(xmm0) | isPositiveZero(xmm0) ; CHECK-NEXT: vmovaps %xmm1, %xmm2 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1} diff --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll index 40578fe746edb..85e1890c2b79a 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll @@ -244,8 +244,8 @@ declare <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half>, i32) define i32 @test_int_x86_avx512_fpclass_ph_512(<32 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_512: ; CHECK: # %bb.0: -; CHECK-NEXT: vfpclassph $2, %zmm0, %k1 -; CHECK-NEXT: vfpclassph $4, %zmm0, %k0 {%k1} +; CHECK-NEXT: vfpclassph $2, %zmm0, %k1 # k1 = isPositiveZero(zmm0) +; CHECK-NEXT: vfpclassph $4, %zmm0, %k0 {%k1} # k0 {%k1} = isNegativeZero(zmm0) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -261,8 +261,8 @@ declare i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half>, i32, i8) define i8 @test_int_x86_avx512_mask_fpclass_sh(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh: ; CHECK: # %bb.0: -; CHECK-NEXT: vfpclasssh $4, %xmm0, %k1 -; CHECK-NEXT: vfpclasssh $2, %xmm0, %k0 {%k1} +; CHECK-NEXT: vfpclasssh $4, %xmm0, %k1 # k1 = isNegativeZero(xmm0) +; CHECK-NEXT: vfpclasssh $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -274,7 +274,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sh(<8 x half> %x0) { define i8 @test_int_x86_avx512_mask_fpclass_sh_load(ptr %x0ptr) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh_load: ; CHECK: # %bb.0: -; CHECK-NEXT: vfpclasssh $4, (%rdi), %k0 +; CHECK-NEXT: vfpclasssh $4, (%rdi), %k0 # k0 = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll index a0fc8180e10b9..47bfea91f58dd 100644 --- a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll @@ -1131,7 +1131,7 @@ declare <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half>, i32) define i8 @test_int_x86_avx512_fpclass_ph_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vfpclassph $2, %xmm0, %k1 +; CHECK-NEXT: vfpclassph $2, %xmm0, %k1 # k1 = isPositiveZero(xmm0) ; CHECK-NEXT: vfpclassph $4, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -1146,7 +1146,7 @@ define i8 @test_int_x86_avx512_fpclass_ph_128(<8 x half> %x0) { define i16 @test_int_x86_avx512_fpclass_ph_256(<16 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vfpclassph $2, %ymm0, %k1 +; CHECK-NEXT: vfpclassph $2, %ymm0, %k1 # k1 = isPositiveZero(ymm0) ; CHECK-NEXT: vfpclassph $4, %ymm0, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax From 0417cd1b3e66c06966a3685f143df9228e2444b1 Mon Sep 17 00:00:00 2001 From: Doug Wyatt Date: Fri, 17 Jan 2025 07:11:36 -0800 Subject: [PATCH 22/88] [Clang] FunctionEffects: Correctly navigate through array types in FunctionEffectsRef::get(). (#121525) `FunctionEffectsRef::get()` is supposed to strip off layers of indirection (pointers/references, type sugar) to get to a `FunctionProtoType` (if any) and return its effects (if any). It wasn't correctly dealing with situations where the compiler implicitly converts an array to a pointer. --------- Co-authored-by: Doug Wyatt Co-authored-by: Sirraide --- clang/include/clang/AST/Type.h | 11 +++++++---- .../Sema/attr-nonblocking-constraints.cpp | 19 ++++++++++++++++++- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index f0fbacccc97bb..3457d524c63aa 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -8841,13 +8841,16 @@ void FixedPointValueToString(SmallVectorImpl &Str, llvm::APSInt Val, unsigned Scale); inline FunctionEffectsRef FunctionEffectsRef::get(QualType QT) { + const Type *TypePtr = QT.getTypePtr(); while (true) { - QualType Pointee = QT->getPointeeType(); - if (Pointee.isNull()) + if (QualType Pointee = TypePtr->getPointeeType(); !Pointee.isNull()) + TypePtr = Pointee.getTypePtr(); + else if (TypePtr->isArrayType()) + TypePtr = TypePtr->getBaseElementTypeUnsafe(); + else break; - QT = Pointee; } - if (const auto *FPT = QT->getAs()) + if (const auto *FPT = TypePtr->getAs()) return FPT->getFunctionEffects(); return {}; } diff --git a/clang/test/Sema/attr-nonblocking-constraints.cpp b/clang/test/Sema/attr-nonblocking-constraints.cpp index bbc909f627f4c..b26a945843696 100644 --- a/clang/test/Sema/attr-nonblocking-constraints.cpp +++ b/clang/test/Sema/attr-nonblocking-constraints.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -fblocks -fcxx-exceptions -std=c++20 -verify -Wfunction-effects %s +// RUN: %clang_cc1 -fsyntax-only -fblocks -fcxx-exceptions -std=c++20 -verify -Wfunction-effects -Wno-vla-extension %s // These are in a separate file because errors (e.g. incompatible attributes) currently prevent // the FXAnalysis pass from running at all. @@ -246,6 +246,23 @@ void PTMFTester::convert() [[clang::nonblocking]] (this->*mConvertFunc)(); } +// Allow implicit conversion from array to pointer. +void nb14(unsigned idx) [[clang::nonblocking]] +{ + using FP = void (*)() [[clang::nonblocking]]; + using FPArray = FP[2]; + auto nb = +[]() [[clang::nonblocking]] {}; + + FPArray src{ nb, nullptr }; + FP f = src[idx]; // This should not generate a warning. + + FP twoDim[2][2] = {}; + FP g = twoDim[1][1]; + + FP vla[idx]; + FP h = vla[0]; +} + // Block variables void nb17(void (^blk)() [[clang::nonblocking]]) [[clang::nonblocking]] { blk(); From 63b0ab84253f29f1f9b9136a02d589552b29c645 Mon Sep 17 00:00:00 2001 From: Iman Hosseini Date: Fri, 17 Jan 2025 15:11:52 +0000 Subject: [PATCH 23/88] remove extra ; (#123352) Remove erroneous extra semicolon in: https://github.com/llvm/llvm-project/pull/122788 Co-authored-by: ImanHosseini --- llvm/lib/Support/APInt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index 38cf485733a93..4e45416b4598f 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -3125,4 +3125,4 @@ APInt APIntOps::pow(const APInt &X, int64_t N) { Acc *= Base; } return Acc; -}; +} From ba44d7ba1fb3e27f51d65ea1af280e00382e09e0 Mon Sep 17 00:00:00 2001 From: Konrad Kleine Date: Fri, 17 Jan 2025 16:25:08 +0100 Subject: [PATCH 24/88] [MLIR][test] Fixup for checking for ml_dtypes (#123240) In order to optionally run some checks that depend on the `ml_dtypes` python module we have to remove the `CHECK` lines for those tests or they will be required and missed in the test output. I've changed to use asserts as recommended in [1]. [1]: https://github.com/llvm/llvm-project/pull/123061#issuecomment-2596116023 --- mlir/test/python/execution_engine.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py index e3f41815800d5..d569fcef32bfd 100644 --- a/mlir/test/python/execution_engine.py +++ b/mlir/test/python/execution_engine.py @@ -566,13 +566,15 @@ def testBF16Memref(): execution_engine.invoke("main", arg1_memref_ptr, arg2_memref_ptr) # test to-numpy utility - # CHECK: [0.5] - npout = ranked_memref_to_numpy(arg2_memref_ptr[0]) - log(npout) + x = ranked_memref_to_numpy(arg2_memref_ptr[0]) + assert len(x) == 1 + assert x[0] == 0.5 if HAS_ML_DTYPES: run(testBF16Memref) +else: + log("TEST: testBF16Memref") # Test f8E5M2 memrefs @@ -606,13 +608,15 @@ def testF8E5M2Memref(): execution_engine.invoke("main", arg1_memref_ptr, arg2_memref_ptr) # test to-numpy utility - # CHECK: [0.5] - npout = ranked_memref_to_numpy(arg2_memref_ptr[0]) - log(npout) + x = ranked_memref_to_numpy(arg2_memref_ptr[0]) + assert len(x) == 1 + assert x[0] == 0.5 if HAS_ML_DTYPES: run(testF8E5M2Memref) +else: + log("TEST: testF8E5M2Memref") # Test addition of two 2d_memref From 7c729418d721147bf1f2b257afd30f84721888ad Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 17 Jan 2025 15:27:48 +0000 Subject: [PATCH 25/88] [llvm][DebugInfo] Attach object-pointer to DISubprogram declarations (#122742) Currently Clang only attaches `DW_AT_object_pointer` to `DW_TAG_subprogram` definitions. LLDB constructs C++ method types from their `DW_TAG_subprogram` declaration, which is also the point at which it needs to determine whether a method is static or not. LLDB's heuristic for this could be very simple if we emitted `DW_AT_object_pointer` on declarations. But since we don't, LLDB has to guess whether an argument is an implicit object parameter based on the DW_AT_name and DW_AT_type. To simplify LLDB's job (and to eventually support C++23's explicit object parameters), this patch adds the `DIFlagObjectPointer` to `DISubprogram` declarations. For reference, GCC attaches the object-pointer DIE to both the definition and declaration: https://godbolt.org/z/3TWjTfWon Fixes https://github.com/llvm/llvm-project/issues/120973 --- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 13 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h | 4 +- llvm/test/DebugInfo/NVPTX/debug-info.ll | 3177 +++++++++-------- .../DebugInfo/X86/DW_AT_object_pointer.ll | 10 +- llvm/test/DebugInfo/X86/dwarf-public-names.ll | 2 +- .../tools/llvm-dwarfdump/X86/statistics.ll | 4 +- 6 files changed, 1647 insertions(+), 1563 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 0a8a1ad38c959..d3450b8b0556f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -849,7 +849,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { } } -void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { +DIE *DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { + // Args[0] is the return type. + DIE *ObjectPointer = nullptr; for (unsigned i = 1, N = Args.size(); i < N; ++i) { const DIType *Ty = Args[i]; if (!Ty) { @@ -860,8 +862,14 @@ void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { addType(Arg, Ty); if (Ty->isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); + if (Ty->isObjectPointer()) { + assert(!ObjectPointer && "Can't have more than one object pointer"); + ObjectPointer = &Arg; + } } } + + return ObjectPointer; } void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { @@ -1358,7 +1366,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. - constructSubprogramArguments(SPDie, Args); + if (auto *ObjectPointer = constructSubprogramArguments(SPDie, Args)) + addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, *ObjectPointer); } addThrownTypes(SPDie, SP->getThrownTypes()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 163205378fb4b..7a5295d826a48 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -268,7 +268,9 @@ class DwarfUnit : public DIEUnit { void constructContainingTypeDIEs(); /// Construct function argument DIEs. - void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args); + /// + /// \returns DIE of the object pointer if one exists. Nullptr otherwise. + DIE *constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args); /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index c926229f96e38..44884381e082e 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -199,6 +199,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 100 // DW_AT_object_pointer +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) @@ -223,6 +225,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 100 // DW_AT_object_pointer +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 50 // DW_AT_accessibility @@ -250,6 +254,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 100 // DW_AT_object_pointer +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 50 // DW_AT_accessibility @@ -272,6 +278,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 100 // DW_AT_object_pointer +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 50 // DW_AT_accessibility @@ -373,6 +381,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 // DW_FORM_data2 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 100 // DW_AT_object_pointer +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) @@ -393,6 +403,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 100 // DW_AT_object_pointer +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) @@ -727,6 +739,21 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 45 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 58 // DW_AT_decl_file +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 46 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string @@ -742,7 +769,27 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 46 // Abbreviation Code +; CHECK-NEXT:.b8 47 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 58 // DW_AT_decl_file +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 48 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes ; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name @@ -764,12 +811,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT:.b32 10035 // Length of Unit +; CHECK-NEXT:.b32 10107 // Length of Unit ; CHECK-NEXT:.b8 2 // DWARF version number ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT:.b8 8 // Address Size (in bytes) -; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x272c DW_TAG_compile_unit +; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x2774 DW_TAG_compile_unit ; CHECK-NEXT:.b8 0 // DW_AT_producer ; CHECK-NEXT:.b8 4 // DW_AT_language ; CHECK-NEXT:.b8 0 @@ -804,7 +851,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x31:0x22a DW_TAG_structure_type +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x31:0x23e DW_TAG_structure_type ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -907,7 +954,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 78 // DW_AT_decl_line -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // Abbrev [3] 0x9e:0x4f DW_TAG_subprogram @@ -983,7 +1030,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 79 // DW_AT_decl_line -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // Abbrev [3] 0xed:0x4f DW_TAG_subprogram @@ -1059,10 +1106,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 80 // DW_AT_decl_line -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x13c:0x49 DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x13c:0x4d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -1122,14 +1169,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 83 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b32 639 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 386 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x17e:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 666 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x182:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 686 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x185:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x189:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -1159,14 +1207,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 429 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x1a5:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 676 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x1ad:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 696 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1ac:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b4:0x30 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -1196,16 +1245,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 472 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x1cc:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 676 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x1d8:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 696 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 681 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 701 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1d8:0x43 DW_TAG_subprogram +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1e4:0x47 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -1258,16 +1308,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 543 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x20f:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 666 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x21f:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 686 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x215:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 681 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x225:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 701 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 9 // Abbrev [9] 0x21b:0x3f DW_TAG_subprogram +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x22b:0x43 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -1316,17 +1367,18 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b32 686 // DW_AT_type +; CHECK-NEXT:.b32 706 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 615 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x253:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 666 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x267:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 686 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x25b:0x10 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x26f:0x10 DW_TAG_base_type ; CHECK-NEXT:.b8 117 // DW_AT_name ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 115 @@ -1342,7 +1394,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x26b:0x2f DW_TAG_structure_type +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x27f:0x2f DW_TAG_structure_type ; CHECK-NEXT:.b8 117 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 @@ -1352,48 +1404,48 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_byte_size ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 190 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x275:0xc DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x289:0xc DW_TAG_member ; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 192 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x281:0xc DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x295:0xc DW_TAG_member ; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 192 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x28d:0xc DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x2a1:0xc DW_TAG_member ; CHECK-NEXT:.b8 122 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 192 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 8 ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x29a:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 671 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x29f:0x5 DW_TAG_const_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2ae:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 691 // DW_AT_type +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x2b3:0x5 DW_TAG_const_type ; CHECK-NEXT:.b32 49 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2a4:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2b8:0x5 DW_TAG_pointer_type ; CHECK-NEXT:.b32 49 // DW_AT_type -; CHECK-NEXT:.b8 14 // Abbrev [14] 0x2a9:0x5 DW_TAG_reference_type -; CHECK-NEXT:.b32 671 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2ae:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x2bd:0x5 DW_TAG_reference_type +; CHECK-NEXT:.b32 691 // DW_AT_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2c2:0x5 DW_TAG_pointer_type ; CHECK-NEXT:.b32 49 // DW_AT_type -; CHECK-NEXT:.b8 15 // Abbrev [15] 0x2b3:0x6 DW_TAG_subprogram +; CHECK-NEXT:.b8 15 // Abbrev [15] 0x2c7:0x6 DW_TAG_subprogram ; CHECK-NEXT:.b32 79 // DW_AT_specification ; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x2b9:0x228 DW_TAG_structure_type +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x2cd:0x23c DW_TAG_structure_type ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -1423,7 +1475,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_byte_size ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 88 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x2d7:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x2eb:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -1496,10 +1548,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 89 // DW_AT_decl_line -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x326:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x33a:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -1572,10 +1624,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x375:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x389:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -1648,10 +1700,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 91 // DW_AT_decl_line -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x3c4:0x47 DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x3d8:0x4b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -1709,14 +1761,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 94 // DW_AT_decl_line -; CHECK-NEXT:.b32 1249 // DW_AT_type +; CHECK-NEXT:.b32 1289 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1052 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x404:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1425 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x41c:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1477 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x40b:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x423:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -1746,14 +1799,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1095 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x42b:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1435 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x447:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x432:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x44e:0x30 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -1783,16 +1837,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1138 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x452:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1435 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x472:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x458:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1440 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x478:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1492 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 8 // Abbrev [8] 0x45e:0x43 DW_TAG_subprogram +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x47e:0x47 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -1845,16 +1900,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1209 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x495:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1425 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x4b9:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1477 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x49b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1440 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x4bf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1492 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 9 // Abbrev [9] 0x4a1:0x3f DW_TAG_subprogram +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x4c5:0x43 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -1903,17 +1959,18 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 96 // DW_AT_decl_line -; CHECK-NEXT:.b32 1445 // DW_AT_type +; CHECK-NEXT:.b32 1497 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1281 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x4d9:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1425 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x501:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1477 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 16 // Abbrev [16] 0x4e1:0x9d DW_TAG_structure_type +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x509:0xa9 DW_TAG_structure_type ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 @@ -1923,37 +1980,37 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 161 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 17 // Abbrev [17] 0x4eb:0xd DW_TAG_member +; CHECK-NEXT:.b8 17 // Abbrev [17] 0x513:0xd DW_TAG_member ; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 163 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 17 // Abbrev [17] 0x4f8:0xd DW_TAG_member +; CHECK-NEXT:.b8 17 // Abbrev [17] 0x520:0xd DW_TAG_member ; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 163 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b8 17 // Abbrev [17] 0x505:0xd DW_TAG_member +; CHECK-NEXT:.b8 17 // Abbrev [17] 0x52d:0xd DW_TAG_member ; CHECK-NEXT:.b8 122 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 163 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 8 -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x512:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x53a:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 @@ -1963,18 +2020,19 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 165 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1353 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x51d:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1406 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x549:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1458 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x523:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 603 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x528:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 603 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x52d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 603 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x533:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x54f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 623 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x554:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 623 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x559:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 623 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x55f:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 @@ -1984,14 +2042,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 166 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1390 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x53e:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1406 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x56e:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1458 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x544:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1411 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x574:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1463 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 19 // Abbrev [19] 0x54a:0x33 DW_TAG_subprogram +; CHECK-NEXT:.b8 19 // Abbrev [19] 0x57a:0x37 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -2029,18 +2088,19 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 167 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 1411 // DW_AT_type +; CHECK-NEXT:.b32 1463 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1450 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x576:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1406 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x5aa:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1458 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x57e:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1249 // DW_AT_type -; CHECK-NEXT:.b8 20 // Abbrev [20] 0x583:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5b2:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 1289 // DW_AT_type +; CHECK-NEXT:.b8 20 // Abbrev [20] 0x5b7:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 639 // DW_AT_type ; CHECK-NEXT:.b8 117 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 @@ -2050,20 +2110,20 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 127 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x591:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1430 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x596:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 697 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x59b:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 697 // DW_AT_type -; CHECK-NEXT:.b8 14 // Abbrev [14] 0x5a0:0x5 DW_TAG_reference_type -; CHECK-NEXT:.b32 1430 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5a5:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 697 // DW_AT_type -; CHECK-NEXT:.b8 15 // Abbrev [15] 0x5aa:0x6 DW_TAG_subprogram -; CHECK-NEXT:.b32 727 // DW_AT_specification +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5c5:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 1482 // DW_AT_type +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x5ca:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 717 // DW_AT_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5cf:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 717 // DW_AT_type +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x5d4:0x5 DW_TAG_reference_type +; CHECK-NEXT:.b32 1482 // DW_AT_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5d9:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 717 // DW_AT_type +; CHECK-NEXT:.b8 15 // Abbrev [15] 0x5de:0x6 DW_TAG_subprogram +; CHECK-NEXT:.b32 747 // DW_AT_specification ; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x5b0:0x233 DW_TAG_structure_type +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x5e4:0x247 DW_TAG_structure_type ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -2094,7 +2154,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_byte_size ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 66 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x5cf:0x50 DW_TAG_subprogram +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x603:0x50 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -2168,10 +2228,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x61f:0x50 DW_TAG_subprogram +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x653:0x50 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -2245,10 +2305,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 68 // DW_AT_decl_line -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x66f:0x50 DW_TAG_subprogram +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x6a3:0x50 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -2322,10 +2382,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 69 // DW_AT_decl_line -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x6bf:0x4a DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x6f3:0x4e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -2386,14 +2446,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b32 639 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1850 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x702:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2019 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x73a:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2091 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x709:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x741:0x2c DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -2424,14 +2485,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1894 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x72a:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2029 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x766:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2101 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x731:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x76d:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -2462,16 +2524,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 1938 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x752:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2029 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x792:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2101 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x758:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2034 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x798:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2106 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 8 // Abbrev [8] 0x75e:0x44 DW_TAG_subprogram +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x79e:0x48 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -2525,16 +2588,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 2010 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x796:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2019 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7da:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2091 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x79c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2034 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x7e0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2106 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 9 // Abbrev [9] 0x7a2:0x40 DW_TAG_subprogram +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x7e6:0x44 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -2584,30 +2648,31 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_decl_file ; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 2039 // DW_AT_type +; CHECK-NEXT:.b32 2111 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b32 2083 // DW_AT_object_pointer ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 3 // DW_AT_accessibility ; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7db:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2019 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x823:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2091 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x7e3:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 2024 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x7e8:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 1456 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x7ed:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1456 // DW_AT_type -; CHECK-NEXT:.b8 14 // Abbrev [14] 0x7f2:0x5 DW_TAG_reference_type -; CHECK-NEXT:.b32 2024 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x7f7:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1456 // DW_AT_type -; CHECK-NEXT:.b8 15 // Abbrev [15] 0x7fc:0x6 DW_TAG_subprogram -; CHECK-NEXT:.b32 1487 // DW_AT_specification +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x82b:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 2096 // DW_AT_type +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x830:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x835:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x83a:0x5 DW_TAG_reference_type +; CHECK-NEXT:.b32 2096 // DW_AT_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x83f:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 15 // Abbrev [15] 0x844:0x6 DW_TAG_subprogram +; CHECK-NEXT:.b32 1539 // DW_AT_specification ; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 21 // Abbrev [21] 0x802:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 21 // Abbrev [21] 0x84a:0x32 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 51 @@ -2627,28 +2692,28 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 3 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x816:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 22 // Abbrev [22] 0x85e:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x81f:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 22 // Abbrev [22] 0x867:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x828:0xb DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 22 // Abbrev [22] 0x870:0xb DW_TAG_formal_parameter ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b32 2109 // DW_AT_type +; CHECK-NEXT:.b32 2181 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x834:0x9 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x87c:0x9 DW_TAG_base_type ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 @@ -2657,9 +2722,9 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x83d:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 23 // Abbrev [23] 0x842:0xd5 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x885:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x88a:0xd5 DW_TAG_subprogram ; CHECK-NEXT:.b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__func_end0 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_frame_base @@ -2688,7 +2753,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 24 // Abbrev [24] 0x86d:0x10 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 24 // Abbrev [24] 0x8b5:0x10 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 2 // DW_AT_address_class ; CHECK-NEXT:.b8 5 // DW_AT_location ; CHECK-NEXT:.b8 144 @@ -2700,62 +2765,62 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 25 // Abbrev [25] 0x87d:0xd DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type +; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8c5:0xd DW_TAG_formal_parameter ; CHECK-NEXT:.b32 $L__debug_loc0 // DW_AT_location ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x88a:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 22 // Abbrev [22] 0x8d2:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 2109 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x893:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2181 // DW_AT_type +; CHECK-NEXT:.b8 22 // Abbrev [22] 0x8db:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 2109 // DW_AT_type -; CHECK-NEXT:.b8 26 // Abbrev [26] 0x89c:0xd DW_TAG_variable +; CHECK-NEXT:.b32 2181 // DW_AT_type +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x8e4:0xd DW_TAG_variable ; CHECK-NEXT:.b32 $L__debug_loc1 // DW_AT_location ; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 27 // Abbrev [27] 0x8a9:0x18 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 691 // DW_AT_abstract_origin +; CHECK-NEXT:.b32 4657 // DW_AT_type +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x8f1:0x18 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 711 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 11 // DW_AT_call_column -; CHECK-NEXT:.b8 27 // Abbrev [27] 0x8c1:0x18 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 1450 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x909:0x18 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 1502 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 24 // DW_AT_call_column -; CHECK-NEXT:.b8 27 // Abbrev [27] 0x8d9:0x18 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 2044 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x921:0x18 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 2116 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__tmp4 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 37 // DW_AT_call_column -; CHECK-NEXT:.b8 28 // Abbrev [28] 0x8f1:0x25 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 2050 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x939:0x25 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 2122 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 $L__tmp9 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__tmp10 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 8 // DW_AT_call_line ; CHECK-NEXT:.b8 5 // DW_AT_call_column -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x909:0xc DW_TAG_formal_parameter +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x951:0xc DW_TAG_formal_parameter ; CHECK-NEXT:.b8 2 // DW_AT_address_class ; CHECK-NEXT:.b8 5 // DW_AT_location ; CHECK-NEXT:.b8 144 @@ -2763,859 +2828,859 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 204 ; CHECK-NEXT:.b8 149 ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 2079 // DW_AT_abstract_origin +; CHECK-NEXT:.b32 2151 // DW_AT_abstract_origin ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 30 // Abbrev [30] 0x917:0x588 DW_TAG_namespace +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x95f:0x588 DW_TAG_namespace ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x91c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x964:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 202 // DW_AT_decl_line -; CHECK-NEXT:.b32 3743 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x923:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3815 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x96b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 203 // DW_AT_decl_line -; CHECK-NEXT:.b32 3787 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x92a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3859 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x972:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 204 // DW_AT_decl_line -; CHECK-NEXT:.b32 3816 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x931:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3888 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x979:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 205 // DW_AT_decl_line -; CHECK-NEXT:.b32 3847 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x938:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3919 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x980:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 206 // DW_AT_decl_line -; CHECK-NEXT:.b32 3876 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x93f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3948 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x987:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 3907 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x946:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3979 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x98e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 208 // DW_AT_decl_line -; CHECK-NEXT:.b32 3936 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x94d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4008 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x995:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3973 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x954:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4045 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x99c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b32 4004 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x95b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4076 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9a3:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 4033 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x962:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4105 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9aa:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 212 // DW_AT_decl_line -; CHECK-NEXT:.b32 4062 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x969:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4134 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9b1:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 213 // DW_AT_decl_line -; CHECK-NEXT:.b32 4105 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x970:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4177 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9b8:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 4132 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x977:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4204 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9bf:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 215 // DW_AT_decl_line -; CHECK-NEXT:.b32 4161 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x97e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4233 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9c6:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 216 // DW_AT_decl_line -; CHECK-NEXT:.b32 4188 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x985:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4260 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9cd:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 4217 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x98c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4289 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9d4:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 4244 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x993:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4316 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9db:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 219 // DW_AT_decl_line -; CHECK-NEXT:.b32 4273 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x99a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4345 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9e2:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 220 // DW_AT_decl_line -; CHECK-NEXT:.b32 4304 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9a1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4376 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9e9:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 221 // DW_AT_decl_line -; CHECK-NEXT:.b32 4333 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9a8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4405 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9f0:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 222 // DW_AT_decl_line -; CHECK-NEXT:.b32 4368 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9af:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4440 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9f7:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 223 // DW_AT_decl_line -; CHECK-NEXT:.b32 4399 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9b6:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4471 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9fe:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 224 // DW_AT_decl_line -; CHECK-NEXT:.b32 4438 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9bd:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4510 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa05:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 225 // DW_AT_decl_line -; CHECK-NEXT:.b32 4473 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9c4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4545 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa0c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 226 // DW_AT_decl_line -; CHECK-NEXT:.b32 4508 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9cb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4580 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa13:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 227 // DW_AT_decl_line -; CHECK-NEXT:.b32 4543 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9d2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4615 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa1a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 4592 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9d9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4664 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa21:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 4635 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9e0:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4707 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa28:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 4672 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9e7:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4744 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa2f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 231 // DW_AT_decl_line -; CHECK-NEXT:.b32 4703 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9ee:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4775 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa36:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 4748 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9f5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4820 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa3d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 4793 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x9fc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4865 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa44:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 234 // DW_AT_decl_line -; CHECK-NEXT:.b32 4849 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa03:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4921 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa4b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 235 // DW_AT_decl_line -; CHECK-NEXT:.b32 4880 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa0a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4952 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa52:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 236 // DW_AT_decl_line -; CHECK-NEXT:.b32 4919 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa11:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4991 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa59:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 4969 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa18:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5041 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa60:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 238 // DW_AT_decl_line -; CHECK-NEXT:.b32 5023 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa1f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5095 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa67:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 239 // DW_AT_decl_line -; CHECK-NEXT:.b32 5054 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa26:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5126 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa6e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 240 // DW_AT_decl_line -; CHECK-NEXT:.b32 5091 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa2d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5163 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa75:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 5141 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa34:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5213 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa7c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 242 // DW_AT_decl_line -; CHECK-NEXT:.b32 5182 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa3b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5254 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa83:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 5219 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa42:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5291 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa8a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 244 // DW_AT_decl_line -; CHECK-NEXT:.b32 5252 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa49:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5324 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa91:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 5283 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa50:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5355 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa98:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 5316 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa57:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5388 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa9f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 5343 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa5e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5415 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xaa6:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 248 // DW_AT_decl_line -; CHECK-NEXT:.b32 5374 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa65:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5446 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xaad:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 5405 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa6c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5477 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xab4:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 5434 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa73:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5506 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xabb:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 5463 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa7a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5535 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xac2:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 5494 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa81:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5566 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xac9:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 5527 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa88:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5599 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xad0:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 254 // DW_AT_decl_line -; CHECK-NEXT:.b32 5562 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xa8f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5634 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xad7:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 255 // DW_AT_decl_line -; CHECK-NEXT:.b32 5598 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xa96:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5670 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xade:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 0 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5655 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xa9e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5727 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xae6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 1 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5686 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xaa6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5758 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xaee:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 2 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5725 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xaae:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5797 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xaf6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 3 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5770 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xab6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5842 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xafe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 4 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5803 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xabe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5875 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb06:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5848 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xac6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5920 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb0e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5894 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xace:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5966 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb16:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5923 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xad6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5995 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb1e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 8 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5954 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xade:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6026 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb26:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 9 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5995 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xae6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6067 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb2e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 10 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6034 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xaee:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6106 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb36:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6069 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xaf6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6141 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb3e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 12 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6096 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xafe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6168 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb46:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 13 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6125 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb06:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6197 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb4e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 14 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6154 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb0e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6226 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb56:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 15 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6181 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb16:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6253 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb5e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 16 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6210 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb1e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6282 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb66:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 17 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6243 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb26:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6315 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb6e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 6274 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb2d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6346 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb75:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6294 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb34:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6366 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb7c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 6314 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb3b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6386 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb83:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b32 6334 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb42:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6406 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb8a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 180 // DW_AT_decl_line -; CHECK-NEXT:.b32 6360 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb49:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6432 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb91:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 199 // DW_AT_decl_line -; CHECK-NEXT:.b32 6380 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb50:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6452 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb98:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 6399 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb57:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6471 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xb9f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 6419 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb5e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6491 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xba6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 0 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6438 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb66:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6510 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbae:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 19 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6458 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb6e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6530 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbb6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 38 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6479 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb76:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6551 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbbe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6504 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb7e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6576 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbc6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 78 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6530 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb86:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6602 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbce:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 97 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6556 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb8e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6628 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbd6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 116 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6575 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb96:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6647 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbde:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 135 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6596 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xb9e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6668 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbe6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 147 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6626 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xba6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6698 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbee:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 184 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6650 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbae:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6722 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbf6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 203 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6669 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbb6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6741 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbfe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 222 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6689 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbbe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6761 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xc06:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 241 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6709 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xbc6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6781 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xc0e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 4 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 6728 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xbce:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6800 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc16:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 6748 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xbd5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6820 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc1d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 119 // DW_AT_decl_line -; CHECK-NEXT:.b32 6763 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xbdc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6835 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc24:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6811 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xbe3:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6883 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc2b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 122 // DW_AT_decl_line -; CHECK-NEXT:.b32 6824 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xbea:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6896 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc32:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 123 // DW_AT_decl_line -; CHECK-NEXT:.b32 6844 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xbf1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6916 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc39:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 129 // DW_AT_decl_line -; CHECK-NEXT:.b32 6873 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xbf8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6945 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc40:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b32 6893 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xbff:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6965 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc47:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 131 // DW_AT_decl_line -; CHECK-NEXT:.b32 6914 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc06:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6986 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc4e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 132 // DW_AT_decl_line -; CHECK-NEXT:.b32 6935 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc0d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7007 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc55:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 133 // DW_AT_decl_line -; CHECK-NEXT:.b32 7063 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc14:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7135 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc5c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 134 // DW_AT_decl_line -; CHECK-NEXT:.b32 7091 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc1b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7163 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc63:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 135 // DW_AT_decl_line -; CHECK-NEXT:.b32 7116 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc22:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7188 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc6a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 136 // DW_AT_decl_line -; CHECK-NEXT:.b32 7134 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc29:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7206 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc71:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 137 // DW_AT_decl_line -; CHECK-NEXT:.b32 7151 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc30:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7223 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc78:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 138 // DW_AT_decl_line -; CHECK-NEXT:.b32 7179 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc37:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7251 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc7f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 139 // DW_AT_decl_line -; CHECK-NEXT:.b32 7200 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc3e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7272 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc86:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 7226 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc45:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7298 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc8d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 7249 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc4c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7321 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc94:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 7276 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc53:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7348 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc9b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 144 // DW_AT_decl_line -; CHECK-NEXT:.b32 7327 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc5a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7399 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xca2:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 7360 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc61:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7432 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xca9:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 152 // DW_AT_decl_line -; CHECK-NEXT:.b32 7393 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc68:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7465 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcb0:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 7408 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc6f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7480 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcb7:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 154 // DW_AT_decl_line -; CHECK-NEXT:.b32 7437 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc76:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7509 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcbe:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b32 7455 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc7d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7527 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcc5:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 7487 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc84:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7559 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xccc:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b32 7519 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc8b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7591 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcd3:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 158 // DW_AT_decl_line -; CHECK-NEXT:.b32 7552 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc92:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7624 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcda:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 160 // DW_AT_decl_line -; CHECK-NEXT:.b32 7575 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xc99:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7647 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xce1:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b32 7620 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xca0:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7692 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xce8:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 7768 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xca7:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7840 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcef:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 7817 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcae:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7889 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcf6:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 7836 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcb5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7908 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcfd:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 7722 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcbc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7794 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xd04:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 7858 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcc3:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7930 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xd0b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 7885 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcca:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7957 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xd12:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 8000 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcd1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8072 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xd19:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 7907 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcd8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7979 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xd20:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 7940 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xcdf:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8012 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xd27:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 8027 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xce6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8099 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd2e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 149 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8070 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xcee:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8142 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd36:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 150 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8102 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xcf6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8174 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd3e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 151 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8136 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xcfe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8208 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd46:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 152 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8168 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd06:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8240 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd4e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 153 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8202 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd0e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8274 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd56:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 154 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8242 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd16:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8314 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd5e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 155 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8274 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd1e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8346 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd66:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 156 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8308 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd26:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8380 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd6e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 157 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8340 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd2e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8412 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd76:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 158 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8372 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd36:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8444 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd7e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 159 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8418 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd3e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8490 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd86:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 160 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8448 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd46:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8520 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd8e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 161 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8480 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd4e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8552 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd96:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 162 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8512 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd56:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8584 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd9e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 163 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8542 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd5e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8614 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xda6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 164 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8574 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd66:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8646 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdae:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 165 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8604 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd6e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8676 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdb6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 166 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8638 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd76:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8710 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdbe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 167 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8670 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd7e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8742 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdc6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 168 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8708 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd86:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8780 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdce:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 169 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8742 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd8e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8814 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdd6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 170 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8784 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd96:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8856 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdde:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 171 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8822 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xd9e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8894 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xde6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 172 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8860 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xda6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8932 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdee:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 173 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8898 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdae:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8970 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdf6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 174 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8939 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdb6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9011 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdfe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 175 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8979 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdbe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9051 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe06:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 176 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9013 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdc6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9085 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe0e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 177 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9053 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdce:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9125 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe16:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 178 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9089 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdd6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9161 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe1e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 179 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9125 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdde:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9197 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe26:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 180 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9163 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xde6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9235 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe2e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 181 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9197 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdee:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9269 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe36:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 182 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9231 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdf6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9303 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe3e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 183 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9263 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xdfe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9335 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe46:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 184 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9295 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe06:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9367 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe4e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 185 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9325 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe0e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9397 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe56:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 186 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9359 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe16:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9431 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe5e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 187 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9395 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe1e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9467 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe66:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 188 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9434 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe26:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9506 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe6e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 189 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9477 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe2e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9549 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe76:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 190 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9526 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe36:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9598 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe7e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 191 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9562 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe3e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9634 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe86:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 192 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9611 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe46:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9683 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe8e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 193 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9660 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe4e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9732 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe96:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 194 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9692 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe56:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9764 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe9e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 195 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9726 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe5e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9798 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xea6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 196 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9770 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe66:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9842 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xeae:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 197 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9812 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe6e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9884 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xeb6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 198 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9842 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe76:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9914 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xebe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 199 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9874 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe7e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9946 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xec6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 200 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9906 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe86:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9978 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xece:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 201 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9936 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe8e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 10008 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xed6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 202 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9968 // DW_AT_import -; CHECK-NEXT:.b8 32 // Abbrev [32] 0xe96:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 10040 // DW_AT_import +; CHECK-NEXT:.b8 32 // Abbrev [32] 0xede:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 203 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 10004 // DW_AT_import +; CHECK-NEXT:.b32 10076 // DW_AT_import ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xe9f:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0xee7:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3631,12 +3696,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 44 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xeb4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xefc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0xeba:0x11 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xf02:0x11 DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -3653,7 +3718,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xecb:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf13:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3671,12 +3736,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 46 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xee2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf2a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xee8:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf30:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3696,12 +3761,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 48 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf01:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf49:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf07:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf4f:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3719,12 +3784,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 50 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf1e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf66:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf24:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf6c:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3744,12 +3809,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 52 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf3d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf85:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf43:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf8b:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3767,12 +3832,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf5a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfa2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf60:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0xfa8:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3793,14 +3858,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 54 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf7a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf7f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfc2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfc7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xf85:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0xfcd:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3820,12 +3885,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 58 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf9e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfe6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xfa4:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0xfec:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3843,12 +3908,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 60 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfbb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1003:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xfc1:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1009:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3866,12 +3931,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfd8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1020:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0xfde:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1026:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3898,14 +3963,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 64 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xffe:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1003:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1046:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x104b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1009:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1051:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3921,12 +3986,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 66 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x101e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1066:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1024:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x106c:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3944,12 +4009,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 68 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x103b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1083:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1041:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1089:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3965,12 +4030,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1056:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x109e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x105c:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x10a4:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3988,12 +4053,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 70 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1073:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10bb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1079:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x10c1:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4009,12 +4074,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x108e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10d6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1094:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x10dc:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4032,12 +4097,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ab:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10f3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x10b1:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x10f9:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4057,12 +4122,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 78 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ca:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1112:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x10d0:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1118:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4080,12 +4145,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 80 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10e7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x112f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x10ed:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1135:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4104,14 +4169,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 82 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1105:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x110a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x114d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1152:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1110:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1158:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4131,12 +4196,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 84 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1129:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1171:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x112f:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1177:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4154,16 +4219,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 86 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1146:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x114b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1150:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x118e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1193:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1198:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1156:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x119e:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4182,14 +4247,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 88 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x116e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1173:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11bb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1179:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x11c1:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4208,14 +4273,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1191:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1196:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x119c:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x11e4:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4234,14 +4299,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 92 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11fc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1201:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x11bf:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1207:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4272,19 +4337,19 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 94 // DW_AT_decl_line -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11e3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x122b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x11e9:0x7 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1231:0x7 DW_TAG_base_type ; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x11f0:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1238:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4306,16 +4371,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 96 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x120b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1210:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1253:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1258:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4702 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1216:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x121b:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x125e:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4657 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1263:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4336,14 +4401,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 98 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1235:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x123a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x127d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1282:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1240:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1288:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4363,12 +4428,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1259:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12a1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x125f:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x12a7:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4394,12 +4459,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x127e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12c6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1284:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x12cc:0x8 DW_TAG_base_type ; CHECK-NEXT:.b8 98 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 111 @@ -4407,7 +4472,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_encoding ; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x128c:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x12d4:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4436,14 +4501,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12ae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12b3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12f6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12fb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x12b9:0x38 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1301:0x38 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4483,14 +4548,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12eb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x132e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1333:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x12f1:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1339:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4510,12 +4575,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 108 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x130a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1352:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1310:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1358:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4538,14 +4603,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x132c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1331:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1374:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1379:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1337:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x137f:0x32 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4579,14 +4644,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 111 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x135e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1363:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13a6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13ab:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1369:0x36 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x13b1:0x36 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4624,14 +4689,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 114 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1394:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1399:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13dc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13e1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x139f:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x13e7:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4651,12 +4716,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 116 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13b8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1400:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x13be:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1406:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4682,12 +4747,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13dd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1425:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x13e3:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x142b:0x32 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4721,14 +4786,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 120 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x140a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x140f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1452:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1457:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1415:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x145d:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4746,12 +4811,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x142c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1474:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1432:0xc DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x147a:0xc DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -4763,7 +4828,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x143e:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1486:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4784,14 +4849,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 123 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1458:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x145d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14a0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14a5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1463:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x14ab:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4813,12 +4878,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 125 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x147e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14c6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1484:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x14cc:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4838,12 +4903,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 126 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x149d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14e5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x14a3:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x14eb:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4865,12 +4930,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 128 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14be:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1506:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x14c4:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x150c:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4886,12 +4951,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 138 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14d9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1521:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x14df:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1527:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4911,12 +4976,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14f8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1540:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x14fe:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1546:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4936,12 +5001,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 132 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1517:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x155f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x151d:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1565:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4959,12 +5024,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 134 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1534:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x157c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x153a:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1582:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4982,12 +5047,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 136 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1551:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1599:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1557:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x159f:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5007,12 +5072,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1570:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1576:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x15be:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5034,12 +5099,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1591:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1597:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x15df:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5063,12 +5128,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15b4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15fc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x15ba:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1602:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5088,14 +5153,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 145 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2109 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x161b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1620:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2181 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x15de:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1626:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5113,12 +5178,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15f5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x163d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x15fb:0xa DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1643:0xa DW_TAG_base_type ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 117 @@ -5128,11 +5193,11 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1605:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5642 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x160a:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 5647 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x160f:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x164d:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5714 // DW_AT_type +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1652:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 5719 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1657:0x8 DW_TAG_base_type ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 97 @@ -5140,7 +5205,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 8 // DW_AT_encoding ; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1617:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x165f:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5160,12 +5225,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 147 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1630:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1678:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1636:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x167e:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5193,12 +5258,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 149 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1657:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x169f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x165d:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x16a5:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5227,14 +5292,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 151 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x167f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1684:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16c7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16cc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x168a:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x16d2:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5251,14 +5316,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16a0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16a5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16e8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16ed:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x16ab:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x16f3:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5287,14 +5352,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16cd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1715:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x171a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x16d8:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1720:0x2e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5319,16 +5384,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16fb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1700:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x173e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1743:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1748:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4702 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1706:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x174e:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5346,12 +5411,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x171d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1765:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1723:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x176b:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5371,12 +5436,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x173c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1784:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1742:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x178a:0x29 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5401,14 +5466,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 165 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1760:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1765:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17a8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17ad:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x176b:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x17b3:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5431,14 +5496,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 167 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1787:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x178c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17cf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17d4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1792:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x17da:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5462,12 +5527,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 169 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4812 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17af:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17f7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x17b5:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x17fd:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5483,12 +5548,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 171 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17ca:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1812:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x17d0:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1818:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5506,12 +5571,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 173 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17e7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x182f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x17ed:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1835:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5529,12 +5594,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1804:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x184c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x180a:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1852:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5550,12 +5615,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 177 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x181f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1867:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1825:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x186d:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5573,12 +5638,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 179 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x183c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1884:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1842:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x188a:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5600,12 +5665,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x185d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18a5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1863:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x18ab:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5625,12 +5690,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x187c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18c4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1882:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x18ca:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 @@ -5638,13 +5703,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 54 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1890:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18d8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1896:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x18de:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 @@ -5652,13 +5717,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18a4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18ec:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x18aa:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x18f2:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 @@ -5666,13 +5731,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 58 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18b8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1900:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x18be:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1906:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 @@ -5681,15 +5746,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 60 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18cd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1915:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x191a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x18d8:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1920:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 105 @@ -5697,26 +5762,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 178 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18e6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x192e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x18ec:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1934:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 63 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18f9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1941:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x18ff:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1947:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 @@ -5724,26 +5789,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x190d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1955:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1913:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x195b:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 101 // DW_AT_name ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1920:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1968:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1926:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x196e:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 98 @@ -5751,13 +5816,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1934:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x197c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x193a:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1982:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 @@ -5766,13 +5831,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 184 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1949:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1991:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x194f:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1997:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 111 @@ -5780,15 +5845,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x195d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1962:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19a5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19aa:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1968:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x19b0:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 @@ -5797,15 +5862,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 103 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1977:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x197c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19bf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19c4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4702 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1982:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x19ca:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 101 @@ -5814,28 +5879,28 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1991:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1996:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x199c:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x19e4:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19a9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19f1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x19af:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x19f7:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 103 @@ -5844,13 +5909,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19be:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a06:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x19c4:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a0c:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 @@ -5858,45 +5923,45 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 115 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x19dd:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x19e2:0x18 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a1a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a1f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6693 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1a25:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5699 // DW_AT_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a2a:0x18 DW_TAG_subprogram ; CHECK-NEXT:.b8 112 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 119 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19ef:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19f4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a37:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a3c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x19fa:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a42:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 65 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a07:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a4f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a0d:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a55:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 @@ -5904,13 +5969,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a1b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a63:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a21:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a69:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 113 ; CHECK-NEXT:.b8 114 @@ -5918,26 +5983,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a2f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a77:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a35:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a7d:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a42:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a8a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a48:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a90:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 @@ -5945,14 +6010,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a56:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a9e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1a5c:0xd DW_TAG_typedef -; CHECK-NEXT:.b32 6761 // DW_AT_type +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1aa4:0xd DW_TAG_typedef +; CHECK-NEXT:.b32 6833 // DW_AT_type ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 118 @@ -5961,10 +6026,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 101 // DW_AT_decl_line -; CHECK-NEXT:.b8 36 // Abbrev [36] 0x1a69:0x2 DW_TAG_structure_type +; CHECK-NEXT:.b8 36 // Abbrev [36] 0x1ab1:0x2 DW_TAG_structure_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1a6b:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6777 // DW_AT_type +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1ab3:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 6849 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 @@ -5974,35 +6039,35 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1a79:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ac1:0x22 DW_TAG_structure_type ; CHECK-NEXT:.b8 16 // DW_AT_byte_size ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a7d:0xf DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1ac5:0xf DW_TAG_member ; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 107 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a8c:0xe DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1ad4:0xe DW_TAG_member ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 108 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 8 ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 38 // Abbrev [38] 0x1a9b:0xd DW_TAG_subprogram +; CHECK-NEXT:.b8 38 // Abbrev [38] 0x1ae3:0xd DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 111 @@ -6015,7 +6080,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1aa8:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1af0:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 @@ -6023,13 +6088,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ab6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1afe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1abc:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b04:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 101 @@ -6040,16 +6105,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1acd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6867 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b15:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6939 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1ad3:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6872 // DW_AT_type -; CHECK-NEXT:.b8 40 // Abbrev [40] 0x1ad8:0x1 DW_TAG_subroutine_type -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1ad9:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b1b:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 6944 // DW_AT_type +; CHECK-NEXT:.b8 40 // Abbrev [40] 0x1b20:0x1 DW_TAG_subroutine_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1b21:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -6057,13 +6122,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 9 // DW_AT_decl_file ; CHECK-NEXT:.b8 26 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ae7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b2f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1aed:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b35:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -6072,13 +6137,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 22 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1afc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b44:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b02:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b4a:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -6087,13 +6152,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 27 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b11:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b59:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1b17:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1b5f:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 98 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 101 @@ -6104,26 +6169,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 10 // DW_AT_decl_file ; CHECK-NEXT:.b8 20 // DW_AT_decl_line -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b32 7050 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b28:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b2d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b32:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b37:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b3c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7020 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 41 // Abbrev [41] 0x1b42:0x1 DW_TAG_pointer_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b43:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6984 // DW_AT_type -; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1b48:0x1 DW_TAG_const_type -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1b49:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6999 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b70:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7051 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b75:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7051 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b7a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b7f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b84:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7092 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 41 // Abbrev [41] 0x1b8a:0x1 DW_TAG_pointer_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b8b:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7056 // DW_AT_type +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1b90:0x1 DW_TAG_const_type +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1b91:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 7071 // DW_AT_type ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 122 @@ -6133,7 +6198,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 11 // DW_AT_decl_file ; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1b57:0x15 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1b9f:0x15 DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -6154,8 +6219,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 20 // Abbrev [20] 0x1b6c:0x16 DW_TAG_typedef -; CHECK-NEXT:.b32 7042 // DW_AT_type +; CHECK-NEXT:.b8 20 // Abbrev [20] 0x1bb4:0x16 DW_TAG_typedef +; CHECK-NEXT:.b32 7114 // DW_AT_type ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -6173,16 +6238,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 230 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b82:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7047 // DW_AT_type -; CHECK-NEXT:.b8 43 // Abbrev [43] 0x1b87:0x10 DW_TAG_subroutine_type -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b8c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b91:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b97:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1bca:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7119 // DW_AT_type +; CHECK-NEXT:.b8 43 // Abbrev [43] 0x1bcf:0x10 DW_TAG_subroutine_type +; CHECK-NEXT:.b32 4657 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bd4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7051 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bd9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7051 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1bdf:0x1c DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 @@ -6193,15 +6258,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 212 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b32 7050 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bad:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bf0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bf5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1bb3:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1bfb:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 118 @@ -6209,15 +6274,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 21 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6748 // DW_AT_type +; CHECK-NEXT:.b32 6820 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c09:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c0e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1bcc:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1c14:0x12 DW_TAG_subprogram ; CHECK-NEXT:.b8 101 // DW_AT_name ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 105 @@ -6229,10 +6294,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bd8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c20:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1bde:0x11 DW_TAG_subprogram +; CHECK-NEXT:.b8 45 // Abbrev [45] 0x1c26:0x11 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 @@ -6243,10 +6308,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1be9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c31:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7050 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1bef:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c37:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 103 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 116 @@ -6257,15 +6322,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 52 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 7174 // DW_AT_type +; CHECK-NEXT:.b32 7246 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c00:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c48:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c06:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5647 // DW_AT_type -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c0b:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c4e:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5719 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c53:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 98 @@ -6274,13 +6339,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 8 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c1a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c62:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c20:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c68:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 @@ -6289,15 +6354,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 23 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6763 // DW_AT_type +; CHECK-NEXT:.b32 6835 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c2f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c34:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c77:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5242 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c7c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c3a:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c82:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 @@ -6308,13 +6373,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 210 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b32 7050 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c4b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c93:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c51:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c99:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 @@ -6324,15 +6389,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 95 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c61:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c66:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ca9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cae:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c6c:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1cb4:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 @@ -6345,19 +6410,19 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 106 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b32 7057 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c7f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7311 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c84:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c89:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c8f:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7316 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1c94:0xb DW_TAG_base_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cc7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7383 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ccc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1cd7:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7388 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1cdc:0xb DW_TAG_base_type ; CHECK-NEXT:.b8 119 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 104 @@ -6368,7 +6433,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1c9f:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1ce7:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 116 @@ -6379,17 +6444,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 98 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7311 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cba:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1cc0:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cf8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7383 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cfd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d02:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 45 // Abbrev [45] 0x1d08:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 111 @@ -6401,16 +6466,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ccc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6978 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cdb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7020 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d14:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7050 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d19:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d1e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d23:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7092 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 45 // Abbrev [45] 0x1ce1:0xf DW_TAG_subprogram +; CHECK-NEXT:.b8 46 // Abbrev [46] 0x1d29:0xf DW_TAG_subprogram ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 @@ -6419,10 +6484,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 118 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1cf0:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1d38:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 @@ -6434,15 +6499,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 224 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b32 7050 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d02:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6978 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d07:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d4a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7050 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d4f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1d0d:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 45 // Abbrev [45] 0x1d55:0x12 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 97 @@ -6454,10 +6519,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d19:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 603 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d61:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 623 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1d1f:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1d67:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6467,17 +6532,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 164 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5699 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d2f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d34:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1d3a:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7174 // DW_AT_type -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1d3f:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d77:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d7c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1d82:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7246 // DW_AT_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1d87:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6487,17 +6552,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d4f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d54:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d59:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1d5f:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d97:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d9c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7554 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1da1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1da7:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6508,17 +6573,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 6999 // DW_AT_type +; CHECK-NEXT:.b32 7071 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d70:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d75:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d7a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1d80:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1db8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dbd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7554 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dc2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1dc8:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 115 @@ -6529,13 +6594,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 205 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d91:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dd9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1d97:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1ddf:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 119 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 115 @@ -6548,21 +6613,21 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 109 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b32 7057 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1daa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7174 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1daf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7610 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1db4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1dba:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7615 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1dbf:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 7316 // DW_AT_type -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1dc4:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1df2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7246 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1df7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7682 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dfc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7057 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1e02:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7687 // DW_AT_type +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1e07:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 7388 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1e0c:0x1c DW_TAG_subprogram ; CHECK-NEXT:.b8 119 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 116 @@ -6573,15 +6638,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 102 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dd5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7174 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dda:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7316 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e1d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7246 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e22:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7388 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 30 // Abbrev [30] 0x1de0:0x78 DW_TAG_namespace +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x1e28:0x78 DW_TAG_namespace ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 103 @@ -6592,43 +6657,43 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1deb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e33:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 201 // DW_AT_decl_line -; CHECK-NEXT:.b32 7768 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1df2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7840 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e3a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 7817 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1df9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7889 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e41:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 7836 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e00:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7908 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e48:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 7858 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e07:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7930 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e4f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 7885 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e0e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7957 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e56:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 7907 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e15:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7979 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e5d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 7940 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e1c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8012 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e64:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 8000 // DW_AT_import -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e23:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8072 // DW_AT_import +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1e6b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 8027 // DW_AT_import -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e2a:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b32 8099 // DW_AT_import +; CHECK-NEXT:.b8 47 // Abbrev [47] 0x1e72:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -6656,17 +6721,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7768 // DW_AT_type +; CHECK-NEXT:.b32 7840 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e4c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e51:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e94:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3842 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e99:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1e58:0xf DW_TAG_typedef -; CHECK-NEXT:.b32 7783 // DW_AT_type +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1ea0:0xf DW_TAG_typedef +; CHECK-NEXT:.b32 7855 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 @@ -6677,35 +6742,35 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1e67:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1eaf:0x22 DW_TAG_structure_type ; CHECK-NEXT:.b8 16 // DW_AT_byte_size ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 117 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e6b:0xf DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1eb3:0xf DW_TAG_member ; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 119 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e7a:0xe DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1ec2:0xe DW_TAG_member ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 120 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 8 ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1e89:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1ed1:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 69 ; CHECK-NEXT:.b8 120 @@ -6718,10 +6783,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e96:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ede:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1e9c:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1ee4:0x16 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 97 @@ -6731,13 +6796,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 12 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1eac:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1eb2:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1efa:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 @@ -6747,15 +6812,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 29 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 7768 // DW_AT_type +; CHECK-NEXT:.b32 7840 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f0a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3842 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f0f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1ecd:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1f15:0x16 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -6765,13 +6830,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 36 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1edd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f25:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1ee3:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1f2b:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6782,17 +6847,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1efe:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1f04:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f3c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f41:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7554 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f46:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1f4c:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6804,17 +6869,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7974 // DW_AT_type +; CHECK-NEXT:.b32 8046 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f16:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f1b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f20:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f26:0x1a DW_TAG_base_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f5e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f63:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7554 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f68:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f6e:0x1a DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -6840,7 +6905,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1f40:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1f88:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6850,15 +6915,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 172 // DW_AT_decl_line -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f50:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f55:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f98:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f9d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1f5b:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1fa3:0x1c DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6869,15 +6934,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b32 8055 // DW_AT_type +; CHECK-NEXT:.b32 8127 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f6c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f71:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fb4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5709 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fb9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f77:0xf DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1fbf:0xf DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -6892,7 +6957,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x1f86:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x1fce:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6913,12 +6978,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 62 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fa0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fe8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x1fa6:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x1fee:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6941,12 +7006,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 90 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fc2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x200a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x1fc8:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2010:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6967,12 +7032,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 57 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fe2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x202a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x1fe8:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2030:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6995,12 +7060,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 95 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2004:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x204c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x200a:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2052:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7024,14 +7089,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 47 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2027:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x202c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x206f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2074:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2032:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x207a:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7052,12 +7117,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 52 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x204c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2094:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2052:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x209a:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7080,12 +7145,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 100 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x206e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20b6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2074:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x20bc:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7106,12 +7171,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 150 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x208e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2094:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x20dc:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7132,12 +7197,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 14 // DW_AT_decl_file ; CHECK-NEXT:.b8 155 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20ae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20f6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x20b4:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x20fc:0x2e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7167,14 +7232,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 165 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20dc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x211f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2124:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x20e2:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x212a:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7193,12 +7258,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 219 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20fa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2142:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2100:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2148:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7219,12 +7284,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 32 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x211a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2162:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2120:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2168:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7245,12 +7310,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 210 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x213a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2182:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2140:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2188:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7269,12 +7334,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 200 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2158:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21a0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x215e:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x21a6:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7295,12 +7360,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 14 // DW_AT_decl_file ; CHECK-NEXT:.b8 145 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2178:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21c0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x217e:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x21c6:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7319,12 +7384,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 14 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2196:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x219c:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x21e4:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7347,12 +7412,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 105 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21b8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2200:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x21be:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2206:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7373,12 +7438,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 14 // DW_AT_decl_file ; CHECK-NEXT:.b8 95 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21d8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2220:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x21de:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2226:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7400,14 +7465,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 80 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21f9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21fe:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2241:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2246:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2204:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x224c:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7430,12 +7495,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 14 // DW_AT_decl_file ; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2220:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2268:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2226:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x226e:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7456,16 +7521,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 32 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2240:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2245:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x224a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2288:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x228d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2292:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2250:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2298:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7487,14 +7552,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 14 // DW_AT_decl_file ; CHECK-NEXT:.b8 110 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x226b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2270:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2276:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x22be:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7516,14 +7581,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 14 // DW_AT_decl_file ; CHECK-NEXT:.b8 105 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2291:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2296:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x229c:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x22e4:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7545,14 +7610,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 17 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22bc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22ff:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2304:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x22c2:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x230a:0x29 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7577,14 +7642,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22e0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22e5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2328:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x232d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4702 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x22eb:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2333:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7608,14 +7673,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 110 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2308:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x230d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2350:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2355:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2313:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x235b:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7638,12 +7703,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x232f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2377:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2335:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x237d:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7667,14 +7732,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 240 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2352:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2357:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x239a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x239f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x235d:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x23a5:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7699,12 +7764,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 235 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x237b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23c3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2381:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x23c9:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7729,12 +7794,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 125 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x239f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23e7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x23a5:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x23ed:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7761,12 +7826,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 66 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3842 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23c5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x240d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x23cb:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2413:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7789,12 +7854,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 76 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23e7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x242f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x23ed:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2435:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7817,12 +7882,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2409:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2451:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x240f:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2457:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7843,12 +7908,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2429:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2471:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x242f:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2477:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7869,12 +7934,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 90 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2449:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2491:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x244f:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2497:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7893,12 +7958,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 67 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2467:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24af:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x246d:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x24b5:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7921,12 +7986,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 116 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2489:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24d1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x248f:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x24d7:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7951,12 +8016,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 71 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ad:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24f5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x24b3:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x24fb:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7979,14 +8044,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 12 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24cf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24d4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2109 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2517:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x251c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2181 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x24da:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2522:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8018,12 +8083,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 130 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ff:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2547:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2505:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x254d:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8056,14 +8121,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 194 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x252b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2530:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2573:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2578:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2536:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x257e:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8083,14 +8148,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 47 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x254f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2554:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2597:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x259c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x255a:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x25a2:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8123,14 +8188,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 22 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2580:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2585:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25c8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25cd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x258b:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x25d3:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8158,16 +8223,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 27 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25ac:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25f4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25f9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25fe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4702 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x25bc:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2604:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8188,12 +8253,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 111 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25d6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x261e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x25dc:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2624:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8216,12 +8281,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 61 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25f8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2640:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x25fe:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2646:0x2c DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8249,14 +8314,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 250 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x261f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2624:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2667:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x266c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5242 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x262a:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2672:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8282,14 +8347,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 245 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2649:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x264e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2691:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2696:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4657 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2654:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x269c:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8308,12 +8373,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 210 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x266c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26b4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2672:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x26ba:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8334,12 +8399,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 37 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x268c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26d4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2692:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x26da:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8360,12 +8425,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 14 // DW_AT_decl_file ; CHECK-NEXT:.b8 139 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ac:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26f4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x26b2:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x26fa:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8384,12 +8449,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 252 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ca:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2712:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x26d0:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2718:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8410,12 +8475,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 42 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ea:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2732:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x26f0:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x2738:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8440,12 +8505,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 56 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x270e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2756:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 46 // Abbrev [46] 0x2714:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 48 // Abbrev [48] 0x275c:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8468,10 +8533,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 14 // DW_AT_decl_file ; CHECK-NEXT:.b8 150 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2730:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2100 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2778:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2172 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT: } diff --git a/llvm/test/DebugInfo/X86/DW_AT_object_pointer.ll b/llvm/test/DebugInfo/X86/DW_AT_object_pointer.ll index d9988ac31451e..30d4203466766 100644 --- a/llvm/test/DebugInfo/X86/DW_AT_object_pointer.ll +++ b/llvm/test/DebugInfo/X86/DW_AT_object_pointer.ll @@ -5,7 +5,15 @@ ; CHECK-NOT: "" ; CHECK: DW_TAG ; CHECK: DW_TAG_class_type -; CHECK: DW_AT_object_pointer [DW_FORM_ref4] (cu + 0x{{[0-9a-f]*}} => {[[PARAM:0x[0-9a-f]*]]}) +; CHECK: [[DECL:0x[0-9a-f]+]]: DW_TAG_subprogram +; CHECK: DW_AT_name {{.*}} "A" +; CHECK: DW_AT_object_pointer [DW_FORM_ref4] +; CHECK-SAME: (cu + 0x{{[0-9a-f]*}} => {[[DECL_PARAM:0x[0-9a-f]*]]}) +; CHECK: [[DECL_PARAM]]: DW_TAG_formal_parameter +; +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_specification [DW_FORM_ref4] (cu + {{.*}} => {[[DECL]]} +; CHECK: DW_AT_object_pointer [DW_FORM_ref4] (cu + 0x{{[0-9a-f]*}} => {[[PARAM:0x[0-9a-f]*]]}) ; CHECK: [[PARAM]]: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "this") diff --git a/llvm/test/DebugInfo/X86/dwarf-public-names.ll b/llvm/test/DebugInfo/X86/dwarf-public-names.ll index c2274511d4191..a484c094892d0 100644 --- a/llvm/test/DebugInfo/X86/dwarf-public-names.ll +++ b/llvm/test/DebugInfo/X86/dwarf-public-names.ll @@ -61,7 +61,7 @@ ; Skip the output to the header of the pubnames section. ; LINUX: debug_pubnames -; LINUX-NEXT: unit_size = 0x00000128 +; LINUX-NEXT: unit_size = ; Check for each name in the output. ; LINUX-DAG: "ns" diff --git a/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll b/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll index a454bf14c3353..9f3a00df2ffe7 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll @@ -55,8 +55,8 @@ ; CHECK: "#bytes within functions": [[FUNCSIZE:[0-9]+]] ; CHECK: "#bytes within inlined functions": [[INLINESIZE:[0-9]+]] ; CHECK: "#bytes in __debug_loc": 35, -; CHECK-NEXT: "#bytes in __debug_abbrev": 384, -; CHECK-NEXT: "#bytes in __debug_info": 459, +; CHECK-NEXT: "#bytes in __debug_abbrev": 386, +; CHECK-NEXT: "#bytes in __debug_info": 463, ; CHECK-NEXT: "#bytes in __debug_str": 231, ; CHECK-NEXT: "#bytes in __apple_names": 348, ; CHECK-NEXT: "#bytes in __apple_objc": 36, From c9f72b2873d2b3ea777c3ee512696f2259252bce Mon Sep 17 00:00:00 2001 From: Keyi Zhang Date: Fri, 17 Jan 2025 08:01:39 -0800 Subject: [PATCH 26/88] [MLIR][LLVM] Fix #llvm.constant_range parsing (#123009) When `APInt` parses negative numbers, it may extend the bit width. This patch ensures the bit width matches with the attribute. Fixes https://github.com/llvm/llvm-project/issues/122996. --- mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp | 8 +++----- mlir/test/Dialect/LLVMIR/func.mlir | 6 ++++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp index ff1636bc121b6..e4f9d6f987401 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp @@ -270,11 +270,9 @@ Attribute ConstantRangeAttr::parse(AsmParser &parser, Type odsType) { if (parser.parseInteger(lower) || parser.parseComma() || parser.parseInteger(upper) || parser.parseGreater()) return Attribute{}; - // For some reason, 0 is always parsed as 64-bits, fix that if needed. - if (lower.isZero()) - lower = lower.sextOrTrunc(bitWidth); - if (upper.isZero()) - upper = upper.sextOrTrunc(bitWidth); + // Non-positive numbers may use more bits than `bitWidth` + lower = lower.sextOrTrunc(bitWidth); + upper = upper.sextOrTrunc(bitWidth); return parser.getChecked(loc, parser.getContext(), lower, upper); } diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index e2a444c1faaba..74dd862ce8fb2 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -479,3 +479,9 @@ llvm.func @intel_reqd_sub_group_size_hint() attributes {llvm.intel_reqd_sub_grou // CHECK-SAME: llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<512 : i64, i32> // CHECK-SAME: llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<128 : i64, !llvm.struct<(i32, i64, f32)> llvm.func @workgroup_attribution(%arg0: !llvm.ptr {llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<512 : i64, i32>}, %arg1: !llvm.ptr {llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<128 : i64, !llvm.struct<(i32, i64, f32)>>}) + +// ----- + +// CHECK: @constant_range_negative +// CHECK-SAME: llvm.range = #llvm.constant_range +llvm.func @constant_range_negative() -> (i32 {llvm.range = #llvm.constant_range}) From 71ad9a958ac192599a46d86724a8084c8c4aff1c Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Fri, 17 Jan 2025 08:06:49 -0800 Subject: [PATCH 27/88] [fuchsia][cmake] Add runtimes for cortex-m4 for the Fuchsia toolchain (#123258) --- clang/cmake/caches/Fuchsia-stage2.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index e799900094df3..1cbf691f29d58 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -300,7 +300,7 @@ if(FUCHSIA_SDK) set(LLVM_RUNTIME_MULTILIB_hwasan+noexcept_TARGETS "aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") endif() -foreach(target armv6m-none-eabi;armv7m-none-eabi;armv8m.main-none-eabi;armv8.1m.main-none-eabi;aarch64-none-elf) +foreach(target armv6m-none-eabi;armv7m-none-eabi;armv7em-none-eabi;armv8m.main-none-eabi;armv8.1m.main-none-eabi;aarch64-none-elf) list(APPEND BUILTIN_TARGETS "${target}") set(BUILTINS_${target}_CMAKE_SYSTEM_NAME Generic CACHE STRING "") set(BUILTINS_${target}_CMAKE_SYSTEM_PROCESSOR arm CACHE STRING "") From 89305c371552adba2bd10394d2c645c9792840b2 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Fri, 17 Jan 2025 11:14:50 -0500 Subject: [PATCH 28/88] [z/OS] add tail padding to TypeLoc if needed (#122761) The code in getLocalDataSize() returns the sum of the size of the LocalData plus the size of the extra data. The start of the extra data is padded so it starts on a multiple of it's alignment. We also need to be adding tail padding so the final size is a multiple of the alignment of the LocalData. On most systems the alignment of the extra data is the same or greater than the alignment of the LocalData so you don't need the tail padding. However, on z/OS, the alignment of the extra data is less than the alignment of the LocalData and thus you do need the tail padding to make the final size a multiple of the LocalData alignment. The extra data is the WrittenBuiltinSpecs struct. This struct is just a struct of bitfields. On most systems the alignment of the struct is determined by the type of the bitfields (eg. unsigned int -> align of 4). On z/OS, all bitfields are 1 byte aligned. Thus on z/OS WrittenBuiltinSpecs is only size 2 with alignment of 1 (verses 4 & 4). --- clang/include/clang/AST/TypeLoc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h index 62ca52e508ba2..a55a38335ef6a 100644 --- a/clang/include/clang/AST/TypeLoc.h +++ b/clang/include/clang/AST/TypeLoc.h @@ -397,6 +397,7 @@ class ConcreteTypeLoc : public Base { unsigned extraAlign = asDerived()->getExtraLocalDataAlignment(); size = llvm::alignTo(size, extraAlign); size += asDerived()->getExtraLocalDataSize(); + size = llvm::alignTo(size, asDerived()->getLocalDataAlignment()); return size; } From 9f627cf540e9f0da1be17581af8d162a78820e6f Mon Sep 17 00:00:00 2001 From: Prashanth Date: Fri, 17 Jan 2025 22:08:03 +0530 Subject: [PATCH 29/88] [libc][docs] Add sys/time page to the status of implementations docs (#123000) These changes ensure that the sys/time header is documented properly with respect to the issue ( #122006 ) . --- libc/docs/CMakeLists.txt | 1 + libc/docs/headers/index.rst | 1 + libc/utils/docgen/sys/time.yaml | 5 +++++ 3 files changed, 7 insertions(+) create mode 100644 libc/utils/docgen/sys/time.yaml diff --git a/libc/docs/CMakeLists.txt b/libc/docs/CMakeLists.txt index e77e979ab1e83..eab79ed70a61b 100644 --- a/libc/docs/CMakeLists.txt +++ b/libc/docs/CMakeLists.txt @@ -55,6 +55,7 @@ if (SPHINX_FOUND) strings sys/mman sys/resource + sys/time sys/wait threads uchar diff --git a/libc/docs/headers/index.rst b/libc/docs/headers/index.rst index 5dbdcbc80529d..63667f0a38997 100644 --- a/libc/docs/headers/index.rst +++ b/libc/docs/headers/index.rst @@ -28,6 +28,7 @@ Implementation Status strings sys/mman sys/resource + sys/time sys/wait threads time diff --git a/libc/utils/docgen/sys/time.yaml b/libc/utils/docgen/sys/time.yaml new file mode 100644 index 0000000000000..a1d19c3fc636c --- /dev/null +++ b/libc/utils/docgen/sys/time.yaml @@ -0,0 +1,5 @@ +functions: + select: + in-latest-posix: '' + utimes: + in-latest-posix: '' \ No newline at end of file From 3f07af93dc013621176f5931ebc8dd07d299b277 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 17 Jan 2025 08:46:16 -0800 Subject: [PATCH 30/88] [AST] Migrate away from PointerUnion::dyn_cast (NFC) (#123283) Note that PointerUnion::dyn_cast has been soft deprecated in PointerUnion.h: // FIXME: Replace the uses of is(), get() and dyn_cast() with // isa, cast and the llvm::dyn_cast Literal migration would result in dyn_cast_if_present (see the definition of PointerUnion::dyn_cast), but this patch uses dyn_cast because we expect Source to be nonnull. --- clang/lib/AST/ByteCode/EvaluationResult.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/EvaluationResult.cpp b/clang/lib/AST/ByteCode/EvaluationResult.cpp index 0ce8f15ea9127..d603e08c7bb4d 100644 --- a/clang/lib/AST/ByteCode/EvaluationResult.cpp +++ b/clang/lib/AST/ByteCode/EvaluationResult.cpp @@ -160,9 +160,9 @@ bool EvaluationResult::checkFullyInitialized(InterpState &S, return true; SourceLocation InitLoc; - if (const auto *D = Source.dyn_cast()) + if (const auto *D = dyn_cast(Source)) InitLoc = cast(D)->getAnyInitializer()->getExprLoc(); - else if (const auto *E = Source.dyn_cast()) + else if (const auto *E = dyn_cast(Source)) InitLoc = E->getExprLoc(); if (const Record *R = Ptr.getRecord()) From f5736aee112177b8b4620ab55bb65152a652dd34 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 17 Jan 2025 08:46:52 -0800 Subject: [PATCH 31/88] [Sema] Migrate away from PointerUnion::dyn_cast (NFC) (#123284) Note that PointerUnion::dyn_cast has been soft deprecated in PointerUnion.h: // FIXME: Replace the uses of is(), get() and dyn_cast() with // isa, cast and the llvm::dyn_cast Literal migration would result in dyn_cast_if_present (see the definition of PointerUnion::dyn_cast), but this patch uses dyn_cast because we expect Stored to be nonnull. --- clang/lib/Sema/SemaTemplateInstantiate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index fb0f38df62a74..839c4e8a28220 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -4661,7 +4661,7 @@ void LocalInstantiationScope::InstantiatedLocal(const Decl *D, Decl *Inst) { } #endif Stored = Inst; - } else if (DeclArgumentPack *Pack = Stored.dyn_cast()) { + } else if (DeclArgumentPack *Pack = dyn_cast(Stored)) { Pack->push_back(cast(Inst)); } else { assert(cast(Stored) == Inst && "Already instantiated this local"); From d5ef2c054d26d0ad2ea8022060d5140f4b663801 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 17 Jan 2025 08:47:15 -0800 Subject: [PATCH 32/88] [AST] Avoid repeated hash lookups (NFC) (#123285) --- clang/lib/AST/VTableBuilder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/VTableBuilder.cpp b/clang/lib/AST/VTableBuilder.cpp index e941c3bedb0a7..fa3055dd1206f 100644 --- a/clang/lib/AST/VTableBuilder.cpp +++ b/clang/lib/AST/VTableBuilder.cpp @@ -3831,8 +3831,8 @@ const VirtualBaseInfo &MicrosoftVTableContext::computeVBTableRelatedInformation( unsigned VBTableIndex = 1 + VBI->VBTableIndices.size(); for (const auto &VB : RD->vbases()) { const CXXRecordDecl *CurVBase = VB.getType()->getAsCXXRecordDecl(); - if (!VBI->VBTableIndices.count(CurVBase)) - VBI->VBTableIndices[CurVBase] = VBTableIndex++; + if (VBI->VBTableIndices.try_emplace(CurVBase, VBTableIndex).second) + ++VBTableIndex; } return *VBI; From d5aa6dfe8caeaadb7a8b4c89614e6b980a49ff7d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 17 Jan 2025 08:47:38 -0800 Subject: [PATCH 33/88] [Analysis] Avoid repeated hash lookups (NFC) (#123286) --- llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 4aa922635c374..7a7a9594f4760 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1409,11 +1409,10 @@ template void BlockFrequencyInfoImpl::applyIterativeInference() { auto Node = getNode(&BB); if (!Node.isValid()) continue; - if (BlockIndex.count(&BB)) { - Freqs[Node.Index].Scaled = Freq[BlockIndex[&BB]]; - } else { + if (auto It = BlockIndex.find(&BB); It != BlockIndex.end()) + Freqs[Node.Index].Scaled = Freq[It->second]; + else Freqs[Node.Index].Scaled = Scaled64::getZero(); - } } } @@ -1764,8 +1763,8 @@ void BlockFrequencyInfoImpl::verifyMatch( for (auto &Entry : ValidNodes) { const BlockT *BB = Entry.first; BlockNode Node = Entry.second; - if (OtherValidNodes.count(BB)) { - BlockNode OtherNode = OtherValidNodes[BB]; + if (auto It = OtherValidNodes.find(BB); It != OtherValidNodes.end()) { + BlockNode OtherNode = It->second; const auto &Freq = Freqs[Node.Index]; const auto &OtherFreq = Other.Freqs[OtherNode.Index]; if (Freq.Integer != OtherFreq.Integer) { From c5312553cb7a49b53ba2bac40fbc3c1745855844 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 17 Jan 2025 08:48:06 -0800 Subject: [PATCH 34/88] [CodeGen] Avoid repeated hash lookups (NFC) (#123287) --- llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index a465f52bfd593..dbc724629d3be 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -230,9 +230,10 @@ void FunctionVarLocs::init(FunctionVarLocsBuilder &Builder) { for (const DbgVariableRecord &DVR : filterDbgVars(I->getDbgRecordRange())) { // Even though DVR defines a variable location, VarLocsBeforeInst can // still be empty if that VarLoc was redundant. - if (!Builder.VarLocsBeforeInst.count(&DVR)) + auto It = Builder.VarLocsBeforeInst.find(&DVR); + if (It == Builder.VarLocsBeforeInst.end()) continue; - for (const VarLocInfo &VarLoc : Builder.VarLocsBeforeInst[&DVR]) + for (const VarLocInfo &VarLoc : It->second) VarLocRecords.emplace_back(VarLoc); } for (const VarLocInfo &VarLoc : P.second) From f719771f251d7c30eca448133fe85730f19a6bd1 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Fri, 17 Jan 2025 16:53:33 +0000 Subject: [PATCH 35/88] Revert "[AArch64] Combine and and lsl into ubfiz" (#123356) Reverts llvm/llvm-project#118974 --- .../Target/AArch64/AArch64ISelLowering.cpp | 41 ------- .../AArch64/const-shift-of-constmasked.ll | 101 ++++++++++-------- llvm/test/CodeGen/AArch64/extract-bits.ll | 16 +-- llvm/test/CodeGen/AArch64/fpenv.ll | 6 +- llvm/test/CodeGen/AArch64/xbfiz.ll | 16 --- 5 files changed, 65 insertions(+), 115 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7d3ca46204b67..0c096711bf3bd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1140,8 +1140,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); - setTargetDAGCombine(ISD::SHL); - // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemset = @@ -26474,43 +26472,6 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return NVCAST; } -/// If the operand is a bitwise AND with a constant RHS, and the shift has a -/// constant RHS and is the only use, we can pull it out of the shift, i.e. -/// -/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) -/// -/// We prefer this canonical form to match existing isel patterns. -static SDValue performSHLCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - SDValue Op0 = N->getOperand(0); - if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse()) - return SDValue(); - - SDValue C1 = Op0->getOperand(1); - SDValue C2 = N->getOperand(1); - if (!isa(C1) || !isa(C2)) - return SDValue(); - - // Might be folded into shifted op, do not lower. - if (N->hasOneUse()) { - unsigned UseOpc = N->user_begin()->getOpcode(); - if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || - UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS) - return SDValue(); - } - - SDLoc DL(N); - EVT VT = N->getValueType(0); - SDValue X = Op0->getOperand(0); - SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); - SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); - return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); -} - SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -26856,8 +26817,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performCTLZCombine(N, DAG, Subtarget); case ISD::SCALAR_TO_VECTOR: return performScalarToVectorCombine(N, DCI, DAG); - case ISD::SHL: - return performSHLCombine(N, DCI, DAG); } return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll index 1fffcdda4b416..66a6745cda8f7 100644 --- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll @@ -190,7 +190,8 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) { define i8 @test_i8_7_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_7_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfiz w0, w0, #1, #3 +; CHECK-NEXT: and w8, w0, #0x7 +; CHECK-NEXT: lsl w0, w8, #1 ; CHECK-NEXT: ret %t0 = and i8 %a0, 7 %t1 = shl i8 %t0, 1 @@ -199,7 +200,8 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) { define i8 @test_i8_7_mask_shl_4(i8 %a0) { ; CHECK-LABEL: test_i8_7_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfiz w0, w0, #4, #3 +; CHECK-NEXT: and w8, w0, #0x7 +; CHECK-NEXT: lsl w0, w8, #4 ; CHECK-NEXT: ret %t0 = and i8 %a0, 7 %t1 = shl i8 %t0, 4 @@ -227,8 +229,8 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) { define i8 @test_i8_28_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #1 -; CHECK-NEXT: and w0, w8, #0x38 +; CHECK-NEXT: and w8, w0, #0x1c +; CHECK-NEXT: lsl w0, w8, #1 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 1 @@ -237,8 +239,8 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) { define i8 @test_i8_28_mask_shl_2(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_2: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #2 -; CHECK-NEXT: and w0, w8, #0x70 +; CHECK-NEXT: and w8, w0, #0x1c +; CHECK-NEXT: lsl w0, w8, #2 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 2 @@ -247,8 +249,8 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) { define i8 @test_i8_28_mask_shl_3(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_3: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #3 -; CHECK-NEXT: and w0, w8, #0xe0 +; CHECK-NEXT: and w8, w0, #0x1c +; CHECK-NEXT: lsl w0, w8, #3 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 3 @@ -257,8 +259,8 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) { define i8 @test_i8_28_mask_shl_4(i8 %a0) { ; CHECK-LABEL: test_i8_28_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #4 -; CHECK-NEXT: and w0, w8, #0xc0 +; CHECK-NEXT: and w8, w0, #0xc +; CHECK-NEXT: lsl w0, w8, #4 ; CHECK-NEXT: ret %t0 = and i8 %a0, 28 %t1 = shl i8 %t0, 4 @@ -268,8 +270,8 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) { define i8 @test_i8_224_mask_shl_1(i8 %a0) { ; CHECK-LABEL: test_i8_224_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #1 -; CHECK-NEXT: and w0, w8, #0xc0 +; CHECK-NEXT: and w8, w0, #0x60 +; CHECK-NEXT: lsl w0, w8, #1 ; CHECK-NEXT: ret %t0 = and i8 %a0, 224 %t1 = shl i8 %t0, 1 @@ -463,7 +465,8 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) { define i16 @test_i16_127_mask_shl_1(i16 %a0) { ; CHECK-LABEL: test_i16_127_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfiz w0, w0, #1, #7 +; CHECK-NEXT: and w8, w0, #0x7f +; CHECK-NEXT: lsl w0, w8, #1 ; CHECK-NEXT: ret %t0 = and i16 %a0, 127 %t1 = shl i16 %t0, 1 @@ -472,7 +475,8 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) { define i16 @test_i16_127_mask_shl_8(i16 %a0) { ; CHECK-LABEL: test_i16_127_mask_shl_8: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfiz w0, w0, #8, #7 +; CHECK-NEXT: and w8, w0, #0x7f +; CHECK-NEXT: lsl w0, w8, #8 ; CHECK-NEXT: ret %t0 = and i16 %a0, 127 %t1 = shl i16 %t0, 8 @@ -500,8 +504,8 @@ define i16 @test_i16_127_mask_shl_10(i16 %a0) { define i16 @test_i16_2032_mask_shl_3(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_3: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #3 -; CHECK-NEXT: and w0, w8, #0x3f80 +; CHECK-NEXT: and w8, w0, #0x7f0 +; CHECK-NEXT: lsl w0, w8, #3 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 3 @@ -510,8 +514,8 @@ define i16 @test_i16_2032_mask_shl_3(i16 %a0) { define i16 @test_i16_2032_mask_shl_4(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_4: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #4 -; CHECK-NEXT: and w0, w8, #0x7f00 +; CHECK-NEXT: and w8, w0, #0x7f0 +; CHECK-NEXT: lsl w0, w8, #4 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 4 @@ -520,8 +524,8 @@ define i16 @test_i16_2032_mask_shl_4(i16 %a0) { define i16 @test_i16_2032_mask_shl_5(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_5: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #5 -; CHECK-NEXT: and w0, w8, #0xfe00 +; CHECK-NEXT: and w8, w0, #0x7f0 +; CHECK-NEXT: lsl w0, w8, #5 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 5 @@ -530,8 +534,8 @@ define i16 @test_i16_2032_mask_shl_5(i16 %a0) { define i16 @test_i16_2032_mask_shl_6(i16 %a0) { ; CHECK-LABEL: test_i16_2032_mask_shl_6: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #6 -; CHECK-NEXT: and w0, w8, #0xfc00 +; CHECK-NEXT: and w8, w0, #0x3f0 +; CHECK-NEXT: lsl w0, w8, #6 ; CHECK-NEXT: ret %t0 = and i16 %a0, 2032 %t1 = shl i16 %t0, 6 @@ -541,8 +545,8 @@ define i16 @test_i16_2032_mask_shl_6(i16 %a0) { define i16 @test_i16_65024_mask_shl_1(i16 %a0) { ; CHECK-LABEL: test_i16_65024_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #1 -; CHECK-NEXT: and w0, w8, #0xfc00 +; CHECK-NEXT: and w8, w0, #0x7e00 +; CHECK-NEXT: lsl w0, w8, #1 ; CHECK-NEXT: ret %t0 = and i16 %a0, 65024 %t1 = shl i16 %t0, 1 @@ -736,7 +740,8 @@ define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) { define i32 @test_i32_32767_mask_shl_1(i32 %a0) { ; CHECK-LABEL: test_i32_32767_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfiz w0, w0, #1, #15 +; CHECK-NEXT: and w8, w0, #0x7fff +; CHECK-NEXT: lsl w0, w8, #1 ; CHECK-NEXT: ret %t0 = and i32 %a0, 32767 %t1 = shl i32 %t0, 1 @@ -745,7 +750,8 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) { define i32 @test_i32_32767_mask_shl_16(i32 %a0) { ; CHECK-LABEL: test_i32_32767_mask_shl_16: ; CHECK: // %bb.0: -; CHECK-NEXT: ubfiz w0, w0, #16, #15 +; CHECK-NEXT: and w8, w0, #0x7fff +; CHECK-NEXT: lsl w0, w8, #16 ; CHECK-NEXT: ret %t0 = and i32 %a0, 32767 %t1 = shl i32 %t0, 16 @@ -773,8 +779,8 @@ define i32 @test_i32_32767_mask_shl_18(i32 %a0) { define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_7: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #7 -; CHECK-NEXT: and w0, w8, #0x3fff8000 +; CHECK-NEXT: and w8, w0, #0x7fff00 +; CHECK-NEXT: lsl w0, w8, #7 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 7 @@ -783,8 +789,8 @@ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_8: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #8 -; CHECK-NEXT: and w0, w8, #0x7fff0000 +; CHECK-NEXT: and w8, w0, #0x7fff00 +; CHECK-NEXT: lsl w0, w8, #8 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 8 @@ -793,8 +799,8 @@ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_9: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #9 -; CHECK-NEXT: and w0, w8, #0xfffe0000 +; CHECK-NEXT: and w8, w0, #0x7fff00 +; CHECK-NEXT: lsl w0, w8, #9 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 9 @@ -803,8 +809,8 @@ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { ; CHECK-LABEL: test_i32_8388352_mask_shl_10: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #10 -; CHECK-NEXT: and w0, w8, #0xfffc0000 +; CHECK-NEXT: and w8, w0, #0x3fff00 +; CHECK-NEXT: lsl w0, w8, #10 ; CHECK-NEXT: ret %t0 = and i32 %a0, 8388352 %t1 = shl i32 %t0, 10 @@ -814,8 +820,8 @@ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) { ; CHECK-LABEL: test_i32_4294836224_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #1 -; CHECK-NEXT: and w0, w8, #0xfffc0000 +; CHECK-NEXT: and w8, w0, #0x7ffe0000 +; CHECK-NEXT: lsl w0, w8, #1 ; CHECK-NEXT: ret %t0 = and i32 %a0, 4294836224 %t1 = shl i32 %t0, 1 @@ -1009,7 +1015,8 @@ define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) { define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { ; CHECK-LABEL: test_i64_2147483647_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w0, w0, #1 +; CHECK-NEXT: and x8, x0, #0x7fffffff +; CHECK-NEXT: lsl x0, x8, #1 ; CHECK-NEXT: ret %t0 = and i64 %a0, 2147483647 %t1 = shl i64 %t0, 1 @@ -1047,8 +1054,8 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_15: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl x8, x0, #15 -; CHECK-NEXT: and x0, x8, #0x3fffffff80000000 +; CHECK-NEXT: and x8, x0, #0x7fffffff0000 +; CHECK-NEXT: lsl x0, x8, #15 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 15 @@ -1057,8 +1064,8 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_16: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl x8, x0, #16 -; CHECK-NEXT: and x0, x8, #0x7fffffff00000000 +; CHECK-NEXT: and x8, x0, #0x7fffffff0000 +; CHECK-NEXT: lsl x0, x8, #16 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 16 @@ -1067,8 +1074,8 @@ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_17: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl x8, x0, #17 -; CHECK-NEXT: and x0, x8, #0xfffffffe00000000 +; CHECK-NEXT: and x8, x0, #0x7fffffff0000 +; CHECK-NEXT: lsl x0, x8, #17 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 17 @@ -1077,8 +1084,8 @@ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { ; CHECK-LABEL: test_i64_140737488289792_mask_shl_18: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl x8, x0, #18 -; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 +; CHECK-NEXT: and x8, x0, #0x3fffffff0000 +; CHECK-NEXT: lsl x0, x8, #18 ; CHECK-NEXT: ret %t0 = and i64 %a0, 140737488289792 %t1 = shl i64 %t0, 18 @@ -1088,8 +1095,8 @@ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) { ; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl x8, x0, #1 -; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 +; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000 +; CHECK-NEXT: lsl x0, x8, #1 ; CHECK-NEXT: ret %t0 = and i64 %a0, 18446744065119617024 %t1 = shl i64 %t0, 1 diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll index aaa6c7eb4a30f..b87157a183835 100644 --- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++ b/llvm/test/CodeGen/AArch64/extract-bits.ll @@ -1013,8 +1013,8 @@ define i32 @c1_i32(i32 %arg) nounwind { define i32 @c2_i32(i32 %arg) nounwind { ; CHECK-LABEL: c2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #17 -; CHECK-NEXT: and w0, w8, #0xffc +; CHECK-NEXT: ubfx w8, w0, #19, #10 +; CHECK-NEXT: lsl w0, w8, #2 ; CHECK-NEXT: ret %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 @@ -1063,8 +1063,8 @@ define i64 @c1_i64(i64 %arg) nounwind { define i64 @c2_i64(i64 %arg) nounwind { ; CHECK-LABEL: c2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #49 -; CHECK-NEXT: and x0, x8, #0xffc +; CHECK-NEXT: ubfx x8, x0, #51, #10 +; CHECK-NEXT: lsl x0, x8, #2 ; CHECK-NEXT: ret %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 @@ -1120,8 +1120,8 @@ define void @c6_i32(i32 %arg, ptr %ptr) nounwind { define void @c7_i32(i32 %arg, ptr %ptr) nounwind { ; CHECK-LABEL: c7_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #17 -; CHECK-NEXT: and w8, w8, #0xffc +; CHECK-NEXT: ubfx w8, w0, #19, #10 +; CHECK-NEXT: lsl w8, w8, #2 ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %tmp0 = lshr i32 %arg, 19 @@ -1163,8 +1163,8 @@ define void @c6_i64(i64 %arg, ptr %ptr) nounwind { define void @c7_i64(i64 %arg, ptr %ptr) nounwind { ; CHECK-LABEL: c7_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #49 -; CHECK-NEXT: and x8, x8, #0xffc +; CHECK-NEXT: ubfx x8, x0, #51, #10 +; CHECK-NEXT: lsl x8, x8, #2 ; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret %tmp0 = lshr i64 %arg, 51 diff --git a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll index 3351565d8dd89..3a307f7731037 100644 --- a/llvm/test/CodeGen/AArch64/fpenv.ll +++ b/llvm/test/CodeGen/AArch64/fpenv.ll @@ -4,11 +4,11 @@ define void @func_set_rounding_dyn(i32 %rm) { ; CHECK-LABEL: func_set_rounding_dyn: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w9, w0, #22 +; CHECK-NEXT: sub w9, w0, #1 ; CHECK-NEXT: mrs x8, FPCR +; CHECK-NEXT: and w9, w9, #0x3 ; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff -; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304 -; CHECK-NEXT: and w9, w9, #0xc00000 +; CHECK-NEXT: lsl w9, w9, #22 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: msr FPCR, x8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll index 05567e3425840..b777ddcb7efcc 100644 --- a/llvm/test/CodeGen/AArch64/xbfiz.ll +++ b/llvm/test/CodeGen/AArch64/xbfiz.ll @@ -69,19 +69,3 @@ define i64 @lsl32_not_ubfiz64(i64 %v) { %and = and i64 %shl, 4294967295 ret i64 %and } - -define i64 @lsl_zext_i8_i64(i8 %b) { -; CHECK-LABEL: lsl_zext_i8_i64: -; CHECK: ubfiz x0, x0, #1, #8 - %1 = zext i8 %b to i64 - %2 = shl i64 %1, 1 - ret i64 %2 -} - -define i64 @lsl_zext_i16_i64(i16 %b) { -; CHECK-LABEL: lsl_zext_i16_i64: -; CHECK: ubfiz x0, x0, #1, #16 - %1 = zext i16 %b to i64 - %2 = shl i64 %1, 1 - ret i64 %2 -} From d54d8d7e5a47d576d46dbc9fe60a0c927d05a648 Mon Sep 17 00:00:00 2001 From: Prashanth Date: Fri, 17 Jan 2025 22:29:49 +0530 Subject: [PATCH 36/88] [libc][docs] Add termios page to the status of implementations docs (#123004) These changes ensure that the termios header is documented properly with respect to the issue ( https://github.com/llvm/llvm-project/issues/122006 ) . --- libc/docs/CMakeLists.txt | 1 + libc/docs/headers/index.rst | 1 + libc/utils/docgen/termios.yaml | 243 +++++++++++++++++++++++++++++++++ 3 files changed, 245 insertions(+) create mode 100644 libc/utils/docgen/termios.yaml diff --git a/libc/docs/CMakeLists.txt b/libc/docs/CMakeLists.txt index eab79ed70a61b..9974769ca7eb3 100644 --- a/libc/docs/CMakeLists.txt +++ b/libc/docs/CMakeLists.txt @@ -57,6 +57,7 @@ if (SPHINX_FOUND) sys/resource sys/time sys/wait + termios threads uchar wchar diff --git a/libc/docs/headers/index.rst b/libc/docs/headers/index.rst index 63667f0a38997..64f84ef09cc35 100644 --- a/libc/docs/headers/index.rst +++ b/libc/docs/headers/index.rst @@ -30,6 +30,7 @@ Implementation Status sys/resource sys/time sys/wait + termios threads time uchar diff --git a/libc/utils/docgen/termios.yaml b/libc/utils/docgen/termios.yaml new file mode 100644 index 0000000000000..81dd8da9f240c --- /dev/null +++ b/libc/utils/docgen/termios.yaml @@ -0,0 +1,243 @@ +macros: + NCCS: + in-latest-posix: '' + + VEOF: + in-latest-posix: '' + VEOL: + in-latest-posix: '' + VERASE: + in-latest-posix: '' + VINTR: + in-latest-posix: '' + VKILL: + in-latest-posix: '' + VMIN: + in-latest-posix: '' + VQUIT: + in-latest-posix: '' + VSTART: + in-latest-posix: '' + VSTOP: + in-latest-posix: '' + VSUSP: + in-latest-posix: '' + VTIME: + in-latest-posix: '' + + BRKINT: + in-latest-posix: '' + ICRNL: + in-latest-posix: '' + IGNBRK: + in-latest-posix: '' + IGNCR: + in-latest-posix: '' + IGNPAR: + in-latest-posix: '' + INLCR: + in-latest-posix: '' + INPCK: + in-latest-posix: '' + ISTRIP: + in-latest-posix: '' + IXANY: + in-latest-posix: '' + IXOFF: + in-latest-posix: '' + IXON: + in-latest-posix: '' + PARMRK: + in-latest-posix: '' + + OPOST: + in-latest-posix: '' + ONLCR: + in-latest-posix: '' + OCRNL: + in-latest-posix: '' + ONOCR: + in-latest-posix: '' + ONLRET: + in-latest-posix: '' + OFDEL: + in-latest-posix: '' + OFILL: + in-latest-posix: '' + + NLDLY: + in-latest-posix: '' + NL0: + in-latest-posix: '' + NL1: + in-latest-posix: '' + + CRDLY: + in-latest-posix: '' + CR0: + in-latest-posix: '' + CR1: + in-latest-posix: '' + CR2: + in-latest-posix: '' + CR3: + in-latest-posix: '' + + TABDLY: + in-latest-posix: '' + TAB0: + in-latest-posix: '' + TAB1: + in-latest-posix: '' + TAB2: + in-latest-posix: '' + TAB3: + in-latest-posix: '' + + BSDLY: + in-latest-posix: '' + BS0: + in-latest-posix: '' + BS1: + in-latest-posix: '' + + VTDLY: + in-latest-posix: '' + VT0: + in-latest-posix: '' + VT1: + in-latest-posix: '' + + FFDLY: + in-latest-posix: '' + FF0: + in-latest-posix: '' + FF1: + in-latest-posix: '' + + B0: + in-latest-posix: '' + B50: + in-latest-posix: '' + B75: + in-latest-posix: '' + B110: + in-latest-posix: '' + B134: + in-latest-posix: '' + B150: + in-latest-posix: '' + B200: + in-latest-posix: '' + B300: + in-latest-posix: '' + B600: + in-latest-posix: '' + B1200: + in-latest-posix: '' + B1800: + in-latest-posix: '' + B2400: + in-latest-posix: '' + B4800: + in-latest-posix: '' + B9600: + in-latest-posix: '' + B19200: + in-latest-posix: '' + B38400: + in-latest-posix: '' + + CSIZE: + in-latest-posix: '' + CS5: + in-latest-posix: '' + CS6: + in-latest-posix: '' + CS7: + in-latest-posix: '' + CS8: + in-latest-posix: '' + + CSTOPB: + in-latest-posix: '' + CREAD: + in-latest-posix: '' + PARENB: + in-latest-posix: '' + PARODD: + in-latest-posix: '' + HUPCL: + in-latest-posix: '' + CLOCAL: + in-latest-posix: '' + + ECHO: + in-latest-posix: '' + ECHOE: + in-latest-posix: '' + ECHOK: + in-latest-posix: '' + ECHONL: + in-latest-posix: '' + ICANON: + in-latest-posix: '' + IEXTEN: + in-latest-posix: '' + ISIG: + in-latest-posix: '' + NOFLSH: + in-latest-posix: '' + TOSTOP: + in-latest-posix: '' + + TCSANOW: + in-latest-posix: '' + TCSADRAIN: + in-latest-posix: '' + TCSAFLUSH: + in-latest-posix: '' + + TCIFLUSH: + in-latest-posix: '' + TCIOFLUSH: + in-latest-posix: '' + TCOFLUSH: + in-latest-posix: '' + + TCIOFF: + in-latest-posix: '' + TCION: + in-latest-posix: '' + TCOOFF: + in-latest-posix: '' + TCOON: + in-latest-posix: '' + +functions: + cfgetispeed: + in-latest-posix: '' + cfgetospeed: + in-latest-posix: '' + cfsetispeed: + in-latest-posix: '' + cfsetospeed: + in-latest-posix: '' + tcdrain: + in-latest-posix: '' + tcflow: + in-latest-posix: '' + tcflush: + in-latest-posix: '' + tcgetattr: + in-latest-posix: '' + tcgetsid: + in-latest-posix: '' + tcgetwinsize: + in-latest-posix: '' + tcsendbreak: + in-latest-posix: '' + tcsetattr: + in-latest-posix: '' + tcsetwinsize: + in-latest-posix: '' \ No newline at end of file From c83e5e85a382083d13933805b994c45d412c568f Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 17 Jan 2025 18:17:21 +0100 Subject: [PATCH 37/88] [libc++] Enable _LIBCPP_NODEBUG again (#123318) `_LIBCPP_NODEBUG` has been disabled temporarily, since there were a few problems when adding a bunch of annotations throughout the code base. They have been resolved now, so we can enable all the annotations again. Reverts llvm/llvm-project#122393 --- libcxx/include/__config | 4 +--- libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 658a7e16fae91..5d5c90d7b87a7 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1166,9 +1166,7 @@ typedef __char32_t char32_t; # define _LIBCPP_NOESCAPE # endif -// FIXME: Expand this to [[__gnu__::__nodebug__]] again once the testcase reported in -// https://github.com/llvm/llvm-project/pull/118710 has been analyzed -# define _LIBCPP_NODEBUG +# define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]] # if __has_attribute(__standalone_debug__) # define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__)) diff --git a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp index f49f3e3c615ca..bc7c8ce7ec443 100644 --- a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp +++ b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp @@ -27,7 +27,7 @@ class LibcxxTestModule : public clang::tidy::ClangTidyModule { check_factories.registerCheck("libcpp-header-exportable-declarations"); check_factories.registerCheck("libcpp-hide-from-abi"); check_factories.registerCheck("libcpp-internal-ftms"); - // check_factories.registerCheck("libcpp-nodebug-on-aliases"); + check_factories.registerCheck("libcpp-nodebug-on-aliases"); check_factories.registerCheck("libcpp-cpp-version-check"); check_factories.registerCheck("libcpp-robust-against-adl"); check_factories.registerCheck("libcpp-uglify-attributes"); From 4b692a95d103f3ad30d6be1ce6d5dda0bd90bc1f Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Fri, 17 Jan 2025 12:22:28 -0500 Subject: [PATCH 38/88] [SPIRV] Expand RWBuffer load and store from HLSL (#122355) The code pattern that clang will generate for HLSL has changed from the original plan. This allows the SPIR-V backend to generate code for the current code generation. It looks for patterns of the form: ``` %1 = @llvm.spv.resource.handlefrombinding %2 = @llvm.spv.resource.getpointer(%1, index) load/store %2 ``` These three llvm-ir instruction are treated as a single unit that will 1. Generate or find the global variable identified by the call to `resource.handlefrombinding`. 2. Generate an OpLoad of the variable to get the handle to the image. 3. Generate an OpImageRead or OpImageWrite using that handle with the given index. This will generate the OpLoad in the same BB as the read/write. Note: Now that `resource.handlefrombinding` is not processed on its own, many existing tests had to be removed. We do not have intrinsics that are able to use handles to sampled images, input attachments, etc., so we cannot generate the load of the handle. These tests are removed for now, and will be added when those resource types are fully implemented. --- llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 17 +- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp | 9 +- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h | 2 +- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 169 ++++++++++++++---- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 6 +- .../SPIRV/hlsl-resources/BufferLoadStore.ll | 60 +++++++ .../SPIRV/hlsl-resources/BufferStore.ll | 3 +- .../CombinedSamplerImageDynIdx.ll | 40 ----- .../CombinedSamplerImageNonUniformIdx.ll | 47 ----- .../InputAttachmentImageDynIdx.ll | 39 ---- .../InputAttachmentImageNonUniformIdx.ll | 46 ----- .../hlsl-resources/SampledImageDynIdx.ll | 65 ------- .../SampledImageNonUniformIdx.ll | 46 ----- .../hlsl-resources/SamplerArrayDynIdx.ll | 38 ---- .../SamplerArrayNonUniformIdx.ll | 45 ----- .../hlsl-resources/ScalarResourceType.ll | 8 + .../hlsl-resources/StorageImageDynIdx.ll | 4 + .../StorageImageNonUniformIdx.ll | 4 + .../StorageTexelBufferDynIdx.ll | 39 ---- .../StorageTexelBufferNonUniformIdx.ll | 46 ----- .../UniformTexelBufferDynIdx.ll | 39 ---- .../UniformTexelBufferNonUniformIdx.ll | 46 ----- .../hlsl-resources/UnknownBufferStore.ll | 3 +- 23 files changed, 240 insertions(+), 581 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/BufferLoadStore.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll delete mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index d2b14d6d058c9..1c1acd29ee0e6 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -264,7 +264,14 @@ bool expectIgnoredInIRTranslation(const Instruction *I) { const auto *II = dyn_cast(I); if (!II) return false; - return II->getIntrinsicID() == Intrinsic::invariant_start; + switch (II->getIntrinsicID()) { + case Intrinsic::invariant_start: + case Intrinsic::spv_resource_handlefrombinding: + case Intrinsic::spv_resource_getpointer: + return true; + default: + return false; + } } bool allowEmitFakeUse(const Value *Arg) { @@ -737,7 +744,13 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( {"__spirv_GenericCastToPtrExplicit_ToLocal", 0}, {"__spirv_GenericCastToPtrExplicit_ToPrivate", 0}}; // TODO: maybe improve performance by caching demangled names - if (Function *CalledF = CI->getCalledFunction()) { + + auto *II = dyn_cast(I); + if (II && II->getIntrinsicID() == Intrinsic::spv_resource_getpointer) { + auto *ImageType = cast(II->getOperand(0)->getType()); + assert(ImageType->getTargetExtName() == "spirv.Image"); + Ty = ImageType->getTypeParameter(0); + } else if (Function *CalledF = CI->getCalledFunction()) { std::string DemangledName = getOclOrSpirvBuiltinDemangledName(CalledF->getName()); if (DemangledName.length() > 0) diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index a06c62e68d106..874894ae98726 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -1114,9 +1114,12 @@ SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg, return nullptr; } -SPIRVType *SPIRVGlobalRegistry::getResultType(Register VReg) { - MachineInstr *Instr = getVRegDef(CurMF->getRegInfo(), VReg); - return getSPIRVTypeForVReg(Instr->getOperand(1).getReg()); +SPIRVType *SPIRVGlobalRegistry::getResultType(Register VReg, + MachineFunction *MF) { + if (!MF) + MF = CurMF; + MachineInstr *Instr = getVRegDef(MF->getRegInfo(), VReg); + return getSPIRVTypeForVReg(Instr->getOperand(1).getReg(), MF); } SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType( diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index 528baf5f8d9e2..0c94ec4df97f5 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -377,7 +377,7 @@ class SPIRVGlobalRegistry { const MachineFunction *MF = nullptr) const; // Return the result type of the instruction defining the register. - SPIRVType *getResultType(Register VReg); + SPIRVType *getResultType(Register VReg, MachineFunction *MF = nullptr); // Whether the given VReg has a SPIR-V type mapped to it yet. bool hasSPIRVTypeForVReg(Register VReg) const { diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 4c861f71b0889..f5409c27d6ea3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -301,8 +301,9 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectReadImageIntrinsic(Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const; - bool selectImageWriteIntrinsic(MachineInstr &I) const; + bool selectResourceGetPointer(Register &ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; // Utilities std::pair @@ -332,10 +333,15 @@ class SPIRVInstructionSelector : public InstructionSelector { SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const; bool extractSubvector(Register &ResVReg, const SPIRVType *ResType, Register &ReadReg, MachineInstr &InsertionPoint) const; + bool generateImageRead(Register &ResVReg, const SPIRVType *ResType, + Register ImageReg, Register IdxReg, DebugLoc Loc, + MachineInstr &Pos) const; bool BuildCOPY(Register DestReg, Register SrcReg, MachineInstr &I) const; bool loadVec3BuiltinInputID(SPIRV::BuiltIn::BuiltIn BuiltInValue, Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType, + GIntrinsic &HandleDef, MachineInstr &Pos) const; }; } // end anonymous namespace @@ -1043,6 +1049,25 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg, MachineInstr &I) const { unsigned OpOffset = isa(I) ? 1 : 0; Register Ptr = I.getOperand(1 + OpOffset).getReg(); + + auto *PtrDef = getVRegDef(*MRI, Ptr); + auto *IntPtrDef = dyn_cast(PtrDef); + if (IntPtrDef && + IntPtrDef->getIntrinsicID() == Intrinsic::spv_resource_getpointer) { + Register ImageReg = IntPtrDef->getOperand(2).getReg(); + Register NewImageReg = + MRI->createVirtualRegister(MRI->getRegClass(ImageReg)); + auto *ImageDef = cast(getVRegDef(*MRI, ImageReg)); + if (!loadHandleBeforePosition(NewImageReg, GR.getSPIRVTypeForVReg(ImageReg), + *ImageDef, I)) { + return false; + } + + Register IdxReg = IntPtrDef->getOperand(3).getReg(); + return generateImageRead(ResVReg, ResType, NewImageReg, IdxReg, + I.getDebugLoc(), I); + } + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad)) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) @@ -1062,6 +1087,29 @@ bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const { unsigned OpOffset = isa(I) ? 1 : 0; Register StoreVal = I.getOperand(0 + OpOffset).getReg(); Register Ptr = I.getOperand(1 + OpOffset).getReg(); + + auto *PtrDef = getVRegDef(*MRI, Ptr); + auto *IntPtrDef = dyn_cast(PtrDef); + if (IntPtrDef && + IntPtrDef->getIntrinsicID() == Intrinsic::spv_resource_getpointer) { + Register ImageReg = IntPtrDef->getOperand(2).getReg(); + Register NewImageReg = + MRI->createVirtualRegister(MRI->getRegClass(ImageReg)); + auto *ImageDef = cast(getVRegDef(*MRI, ImageReg)); + if (!loadHandleBeforePosition(NewImageReg, GR.getSPIRVTypeForVReg(ImageReg), + *ImageDef, I)) { + return false; + } + + Register IdxReg = IntPtrDef->getOperand(3).getReg(); + return BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpImageWrite)) + .addUse(NewImageReg) + .addUse(IdxReg) + .addUse(StoreVal) + .constrainAllUses(TII, TRI, RBI); + } + MachineBasicBlock &BB = *I.getParent(); auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpStore)) .addUse(Ptr) @@ -3066,6 +3114,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_resource_load_typedbuffer: { return selectReadImageIntrinsic(ResVReg, ResType, I); } + case Intrinsic::spv_resource_getpointer: { + return selectResourceGetPointer(ResVReg, ResType, I); + } case Intrinsic::spv_discard: { return selectDiscard(ResVReg, ResType, I); } @@ -3083,27 +3134,7 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, bool SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const { - - uint32_t Set = foldImm(I.getOperand(2), MRI); - uint32_t Binding = foldImm(I.getOperand(3), MRI); - uint32_t ArraySize = foldImm(I.getOperand(4), MRI); - Register IndexReg = I.getOperand(5).getReg(); - bool IsNonUniform = ArraySize > 1 && foldImm(I.getOperand(6), MRI); - - MachineIRBuilder MIRBuilder(I); - Register VarReg = buildPointerToResource(ResType, Set, Binding, ArraySize, - IndexReg, IsNonUniform, MIRBuilder); - - if (IsNonUniform) - buildOpDecorate(ResVReg, I, TII, SPIRV::Decoration::NonUniformEXT, {}); - - // TODO: For now we assume the resource is an image, which needs to be - // loaded to get the handle. That will not be true for storage buffers. - return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(VarReg) - .constrainAllUses(TII, TRI, RBI); + return true; } bool SPIRVInstructionSelector::selectReadImageIntrinsic( @@ -3116,34 +3147,49 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic( // We will do that when we can, but for now trying to move forward with other // issues. Register ImageReg = I.getOperand(2).getReg(); - assert(MRI->getVRegDef(ImageReg)->getParent() == I.getParent() && - "The image must be loaded in the same basic block as its use."); + auto *ImageDef = cast(getVRegDef(*MRI, ImageReg)); + Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg)); + if (!loadHandleBeforePosition(NewImageReg, GR.getSPIRVTypeForVReg(ImageReg), + *ImageDef, I)) { + return false; + } + + Register IdxReg = I.getOperand(3).getReg(); + DebugLoc Loc = I.getDebugLoc(); + MachineInstr &Pos = I; + return generateImageRead(ResVReg, ResType, NewImageReg, IdxReg, Loc, Pos); +} + +bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg, + const SPIRVType *ResType, + Register ImageReg, + Register IdxReg, DebugLoc Loc, + MachineInstr &Pos) const { uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType); if (ResultSize == 4) { - return BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(SPIRV::OpImageRead)) + return BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead)) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) .addUse(ImageReg) - .addUse(I.getOperand(3).getReg()) + .addUse(IdxReg) .constrainAllUses(TII, TRI, RBI); } - SPIRVType *ReadType = widenTypeToVec4(ResType, I); + SPIRVType *ReadType = widenTypeToVec4(ResType, Pos); Register ReadReg = MRI->createVirtualRegister(GR.getRegClass(ReadType)); bool Succeed = - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpImageRead)) + BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead)) .addDef(ReadReg) .addUse(GR.getSPIRVTypeID(ReadType)) .addUse(ImageReg) - .addUse(I.getOperand(3).getReg()) + .addUse(IdxReg) .constrainAllUses(TII, TRI, RBI); if (!Succeed) return false; if (ResultSize == 1) { - return BuildMI(*I.getParent(), I, I.getDebugLoc(), + return BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpCompositeExtract)) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) @@ -3151,7 +3197,25 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic( .addImm(0) .constrainAllUses(TII, TRI, RBI); } - return extractSubvector(ResVReg, ResType, ReadReg, I); + return extractSubvector(ResVReg, ResType, ReadReg, Pos); +} + +bool SPIRVInstructionSelector::selectResourceGetPointer( + Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const { +#ifdef ASSERT + // For now, the operand is an image. This will change once we start handling + // more resource types. + Register ResourcePtr = I.getOperand(2).getReg(); + SPIRVType *RegType = GR.getResultType(ResourcePtr); + assert(RegType->getOpcode() == SPIRV::OpTypeImage && + "Can only handle texel buffers for now."); +#endif + + // For texel buffers, the index into the image is part of the OpImageRead or + // OpImageWrite instructions. So we will do nothing in this case. This + // intrinsic will be combined with the load or store when selecting the load + // or store. + return true; } bool SPIRVInstructionSelector::extractSubvector( @@ -3203,15 +3267,20 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic( // We will do that when we can, but for now trying to move forward with other // issues. Register ImageReg = I.getOperand(1).getReg(); - assert(MRI->getVRegDef(ImageReg)->getParent() == I.getParent() && - "The image must be loaded in the same basic block as its use."); + auto *ImageDef = cast(getVRegDef(*MRI, ImageReg)); + Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg)); + if (!loadHandleBeforePosition(NewImageReg, GR.getSPIRVTypeForVReg(ImageReg), + *ImageDef, I)) { + return false; + } + Register CoordinateReg = I.getOperand(2).getReg(); Register DataReg = I.getOperand(3).getReg(); assert(GR.getResultType(DataReg)->getOpcode() == SPIRV::OpTypeVector); assert(GR.getScalarOrVectorComponentCount(GR.getResultType(DataReg)) == 4); return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpImageWrite)) - .addUse(ImageReg) + .addUse(NewImageReg) .addUse(CoordinateReg) .addUse(DataReg) .constrainAllUses(TII, TRI, RBI); @@ -3878,6 +3947,36 @@ SPIRVType *SPIRVInstructionSelector::widenTypeToVec4(const SPIRVType *Type, return GR.getOrCreateSPIRVVectorType(ScalarType, 4, MIRBuilder); } +bool SPIRVInstructionSelector::loadHandleBeforePosition( + Register &HandleReg, const SPIRVType *ResType, GIntrinsic &HandleDef, + MachineInstr &Pos) const { + + assert(HandleDef.getIntrinsicID() == + Intrinsic::spv_resource_handlefrombinding); + uint32_t Set = foldImm(HandleDef.getOperand(2), MRI); + uint32_t Binding = foldImm(HandleDef.getOperand(3), MRI); + uint32_t ArraySize = foldImm(HandleDef.getOperand(4), MRI); + Register IndexReg = HandleDef.getOperand(5).getReg(); + bool IsNonUniform = ArraySize > 1 && foldImm(HandleDef.getOperand(6), MRI); + + MachineIRBuilder MIRBuilder(HandleDef); + Register VarReg = buildPointerToResource(ResType, Set, Binding, ArraySize, + IndexReg, IsNonUniform, MIRBuilder); + + if (IsNonUniform) + buildOpDecorate(HandleReg, HandleDef, TII, SPIRV::Decoration::NonUniformEXT, + {}); + + // TODO: For now we assume the resource is an image, which needs to be + // loaded to get the handle. That will not be true for storage buffers. + return BuildMI(*Pos.getParent(), Pos, HandleDef.getDebugLoc(), + TII.get(SPIRV::OpLoad)) + .addDef(HandleReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(VarReg) + .constrainAllUses(TII, TRI, RBI); +} + namespace llvm { InstructionSelector * createSPIRVInstructionSelector(const SPIRVTargetMachine &TM, diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 4fa2dca5a78c0..bc00d5032544f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1694,14 +1694,16 @@ void addInstrRequirements(const MachineInstr &MI, break; case SPIRV::OpImageRead: { Register ImageReg = MI.getOperand(2).getReg(); - SPIRVType *TypeDef = ST.getSPIRVGlobalRegistry()->getResultType(ImageReg); + SPIRVType *TypeDef = ST.getSPIRVGlobalRegistry()->getResultType( + ImageReg, const_cast(MI.getMF())); if (isImageTypeWithUnknownFormat(TypeDef)) Reqs.addCapability(SPIRV::Capability::StorageImageReadWithoutFormat); break; } case SPIRV::OpImageWrite: { Register ImageReg = MI.getOperand(0).getReg(); - SPIRVType *TypeDef = ST.getSPIRVGlobalRegistry()->getResultType(ImageReg); + SPIRVType *TypeDef = ST.getSPIRVGlobalRegistry()->getResultType( + ImageReg, const_cast(MI.getMF())); if (isImageTypeWithUnknownFormat(TypeDef)) Reqs.addCapability(SPIRV::Capability::StorageImageWriteWithoutFormat); break; diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/BufferLoadStore.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/BufferLoadStore.ll new file mode 100644 index 0000000000000..25dcc90cb61cd --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/BufferLoadStore.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: [[float:%[0-9]+]] = OpTypeFloat 32 +; CHECK-DAG: [[v4float:%[0-9]+]] = OpTypeVector [[float]] 4 +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[one:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[twenty:%[0-9]+]] = OpConstant [[int]] 20 +; CHECK-DAG: [[twenty_three:%[0-9]+]] = OpConstant [[int]] 23 +; CHECK-DAG: [[ImageType:%[0-9]+]] = OpTypeImage [[float]] Buffer 2 0 0 2 Rgba32f +; CHECK-DAG: [[ImagePtr:%[0-9]+]] = OpTypePointer UniformConstant [[ImageType]] +; CHECK: [[Var:%[0-9]+]] = OpVariable [[ImagePtr]] UniformConstant + +; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) +define void @main() local_unnamed_addr #0 { +entry: +; CHECK: [[H:%[0-9]+]] = OpLoad [[ImageType]] [[Var]] + %s_h.i = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 1) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_0t(i32 3, i32 5, i32 1, i32 0, i1 false) + +; CHECK: [[R:%[0-9]+]] = OpImageRead [[v4float]] [[H]] [[one]] +; CHECK: [[V:%[0-9]+]] = OpCompositeExtract [[float]] [[R]] 0 + %0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", float, 5, 2, 0, 0, 2, 1) %s_h.i, i32 1) + %1 = load float, ptr %0, align 4 +; CHECK: OpBranch [[bb_store:%[0-9]+]] + br label %bb_store + +; CHECK: [[bb_store]] = OpLabel +bb_store: + +; CHECK: [[H:%[0-9]+]] = OpLoad [[ImageType]] [[Var]] +; CHECK: OpImageWrite [[H]] [[zero]] [[V]] + %2 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", float, 5, 2, 0, 0, 2, 1) %s_h.i, i32 0) + store float %1, ptr %2, align 4 +; CHECK: OpBranch [[bb_both:%[0-9]+]] + br label %bb_both + +; CHECK: [[bb_both]] = OpLabel +bb_both: +; CHECK: [[H:%[0-9]+]] = OpLoad [[ImageType]] [[Var]] +; CHECK: [[R:%[0-9]+]] = OpImageRead [[v4float]] [[H]] [[twenty_three]] +; CHECK: [[V:%[0-9]+]] = OpCompositeExtract [[float]] [[R]] 0 + %3 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", float, 5, 2, 0, 0, 2, 1) %s_h.i, i32 23) + %4 = load float, ptr %3, align 4 + +; CHECK: [[H:%[0-9]+]] = OpLoad [[ImageType]] [[Var]] +; CHECK: OpImageWrite [[H]] [[twenty]] [[V]] + %5 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", float, 5, 2, 0, 0, 2, 1) %s_h.i, i32 20) + store float %4, ptr %5, align 4 + ret void +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", float, 5, 2, 0, 0, 2, 1), i32) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) +declare target("spirv.Image", float, 5, 2, 0, 0, 2, 1) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_0t(i32, i32, i32, i32, i1) #1 + +attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/BufferStore.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/BufferStore.ll index b0ffa01ccdd44..812e20e45565b 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/BufferStore.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/BufferStore.ll @@ -20,13 +20,14 @@ declare <4 x i32> @get_data() #1 ; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} ; CHECK-NEXT: OpLabel define void @RWBufferStore_Vec4_I32() #0 { -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) @llvm.spv.resource.handlefrombinding.tspirv.Image_i32_5_2_0_0_2_24( i32 16, i32 7, i32 1, i32 0, i1 false) ; CHECK: [[data:%[0-9]+]] = OpFunctionCall %data = call <4 x i32> @get_data() + +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] ; CHECK: OpImageWrite [[buffer]] [[zero]] [[data]] call void @llvm.spv.resource.store.typedbuffer(target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) %buffer0, i32 0, <4 x i32> %data) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll deleted file mode 100644 index 97a7252eb067b..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll +++ /dev/null @@ -1,40 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; CHECK-NEXT: OpCapability SampledImageArrayDynamicIndexing -; CHECK-NEXT: OpCapability Sampled1D -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} -; CHECK-DAG: [[CombindedType:%[0-9]+]] = OpTypeSampledImage [[BufferType]] -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[CombindedType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[CombindedType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[CombindedType]] [[ac]] - %buffer0 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 0, i1 false) - -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[CombindedType]] [[ac]] - %buffer1 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 1, i1 false) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll deleted file mode 100644 index 6c5c126e4462b..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; CHECK: OpCapability ShaderNonUniform -; CHECK-NEXT: OpCapability SampledImageArrayNonUniformIndexing -; CHECK-NEXT: OpCapability Sampled1D -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 -; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} -; CHECK-DAG: [[CombindedType:%[0-9]+]] = OpTypeSampledImage [[BufferType]] -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[CombindedType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[CombindedType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[ld0:%[0-9]+]] = OpLoad [[CombindedType]] [[ac0]] - %buffer0 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 0, i1 true) - -; CHECK: [[ac1]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[ld1]] = OpLoad [[CombindedType]] [[ac1]] - %buffer1 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 1, i1 true) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll deleted file mode 100644 index 2a52dd1817f0c..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; CHECK-NEXT: OpCapability InputAttachmentArrayDynamicIndexing -; SCHECK-NEXT: OpCapability InputAttachment -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] SubpassData 2 0 0 2 Unknown {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer0 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_6_2_0_0_2_0( - i32 3, i32 4, i32 3, i32 0, i1 false) - -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer1 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_6_2_0_0_2_0( - i32 3, i32 4, i32 3, i32 1, i1 false) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll deleted file mode 100644 index 6dae79c5b385d..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; CHECK-NEXT: OpCapability ShaderNonUniformEXT -; CHECK-NEXT: OpCapability InputAttachmentArrayNonUniformIndexing -; SCHECK-NEXT: OpCapability InputAttachment -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 -; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] SubpassData 2 0 0 2 Unknown {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] - %buffer0 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_6_2_0_0_2_0( - i32 3, i32 4, i32 3, i32 0, i1 true) - -; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] - %buffer1 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_6_2_0_0_2_0( - i32 3, i32 4, i32 3, i32 1, i1 true) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll deleted file mode 100644 index efd89c5977f97..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll +++ /dev/null @@ -1,65 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; CHECK-NEXT: OpCapability SampledImageArrayDynamicIndexing -; CHECK-NEXT: OpCapability Sampled1D -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 -; CHECK-DAG: OpDecorate [[OtherVar:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[OtherVar]] Binding 4 - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK-DAG: [[OtherArraySize:%[0-9]+]] = OpConstant [[int]] 5 -; CHECK-DAG: [[OtherBufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[OtherArraySize]] -; CHECK-DAG: [[OtherArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[OtherBufferArrayType]] -; CHECK-DAG: [[OtherVar]] = OpVariable [[OtherArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 0, i1 false) - -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 1, i1 false) - ret void -} - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @DifferentArraySizesAreDifferentVariables() #0 { -; Make sure we use different variables when the array sizes are different -; same in case one function calls the other. -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 0, i1 false) - -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[OtherVar]] [[One]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 5, i32 1, i1 false) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll deleted file mode 100644 index 6d93051ce3f0a..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; CHECK-NEXT: OpCapability ShaderNonUniformEXT -; CHECK-NEXT: OpCapability SampledImageArrayNonUniformIndexing -; CHECK-NEXT: OpCapability Sampled1D -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 -; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] - %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 0, i1 true) - -; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] - %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 1, i1 true) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll deleted file mode 100644 index fd276e9ef4a98..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; CHECK-NEXT: OpCapability SampledImageArrayDynamicIndexing -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[SamplerType:%[0-9]+]] = OpTypeSampler -; CHECK-DAG: [[SamplerPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 -; CHECK-DAG: [[SamplerArrayType:%[0-9]+]] = OpTypeArray [[SamplerType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[Zero]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[SamplerType]] [[ac]] - %buffer0 = call target("spirv.Sampler") - @llvm.spv.resource.handlefrombinding.tspirv.Image( - i32 3, i32 4, i32 3, i32 0, i1 false) - -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[One]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[SamplerType]] [[ac]] - %buffer1 = call target("spirv.Sampler") - @llvm.spv.resource.handlefrombinding.tspirv.Image( - i32 3, i32 4, i32 3, i32 1, i1 false) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll deleted file mode 100644 index 3e59d66febf0b..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; CHECK-NEXT: ShaderNonUniform -; CHECK-NEXT: OpCapability SampledImageArrayNonUniformIndexing -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 -; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[SamplerType:%[0-9]+]] = OpTypeSampler -; CHECK-DAG: [[SamplerPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 -; CHECK-DAG: [[SamplerArrayType:%[0-9]+]] = OpTypeArray [[SamplerType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac0]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[Zero]] -; CHECK: [[ld0]] = OpLoad [[SamplerType]] [[ac0]] - %buffer0 = call target("spirv.Sampler") - @llvm.spv.resource.handlefrombinding.tspirv.Image( - i32 3, i32 4, i32 3, i32 0, i1 true) - -; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[One]] -; CHECK: [[ld1]] = OpLoad [[SamplerType]] [[ac1]] - %buffer1 = call target("spirv.Sampler") - @llvm.spv.resource.handlefrombinding.tspirv.Image( - i32 3, i32 4, i32 3, i32 1, i1 true) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/ScalarResourceType.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/ScalarResourceType.ll index 52cc2275bc7a6..f52fd44bf3801 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/ScalarResourceType.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/ScalarResourceType.ll @@ -22,12 +22,16 @@ define void @RWBufferLoad() #0 { %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_24( i32 16, i32 7, i32 1, i32 0, i1 false) + %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) %buffer0, i32 0) + store i32 0, ptr %ptr0, align 4 ; Make sure we use the same variable with multiple loads. ; CHECK: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_24( i32 16, i32 7, i32 1, i32 0, i1 false) + %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) %buffer1, i32 0) + store i32 0, ptr %ptr1, align 4 ret void } @@ -40,6 +44,8 @@ define void @UseDifferentGlobalVar() #0 { %buffer0 = call target("spirv.Image", float, 5, 2, 0, 0, 2, 3) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_3( i32 16, i32 7, i32 1, i32 0, i1 false) + %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", float, 5, 2, 0, 0, 2, 3) %buffer0, i32 0) + store float 0.0, ptr %ptr0, align 4 ret void } @@ -52,6 +58,8 @@ define void @ReuseGlobalVarFromFirstFunction() #0 { %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_24( i32 16, i32 7, i32 1, i32 0, i1 false) + %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) %buffer1, i32 0) + store i32 0, ptr %ptr1, align 4 ret void } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll index 082a5c832f1c4..94ebe74148b95 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll @@ -27,12 +27,16 @@ define void @main() #0 { %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24( i32 3, i32 4, i32 3, i32 0, i1 false) + %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer0, i32 0) + store i32 0, ptr %ptr0, align 4 ; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] ; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24( i32 3, i32 4, i32 3, i32 1, i1 false) + %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer1, i32 0) + store i32 0, ptr %ptr1, align 4 ret void } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll index d6419492bb952..f9466e431c19c 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll @@ -34,12 +34,16 @@ define void @main() #0 { %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24( i32 3, i32 4, i32 3, i32 0, i1 true) + %ptr0 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer0, i32 0) + store i32 0, ptr %ptr0, align 4 ; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] ; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_0_2_0_0_2_24( i32 3, i32 4, i32 3, i32 1, i1 true) + %ptr1 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.spv.resource.getpointer.p0.tspirv.Image_f32_5_2_0_0_2_0t(target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) %buffer1, i32 0) + store i32 0, ptr %ptr1, align 4 ret void } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll deleted file mode 100644 index 31fdcb362eb73..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; SCHECK-NEXT: OpCapability ImageBuffer -; CHECK-NEXT: OpCapability StorageTexelBufferArrayDynamicIndexing -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 2 R32i {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @void() #0 { -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_24( - i32 3, i32 4, i32 3, i32 0, i1 false) - -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_24( - i32 3, i32 4, i32 3, i32 1, i1 false) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll deleted file mode 100644 index a5608979025fe..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; SCHECK-NEXT: OpCapability ImageBuffer -; CHECK-NEXT: OpCapability ShaderNonUniformEXT -; CHECK-NEXT: OpCapability StorageTexelBufferArrayNonUniformIndexingEXT -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 -; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 2 R32i {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] - %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_24( - i32 3, i32 4, i32 3, i32 0, i1 true) - -; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] - %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_24( - i32 3, i32 4, i32 3, i32 1, i1 true) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll deleted file mode 100644 index 131a6b38d393e..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; SCHECK-NEXT: OpCapability SampledBuffer -; CHECK-NEXT: OpCapability UniformTexelBufferArrayDynamicIndexing -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 1 R32i {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 0, i1 false) - -; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] - %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 1, i1 false) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll deleted file mode 100644 index cfb3eb5f52076..0000000000000 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} - -; CHECK: OpCapability Shader -; SCHECK-NEXT: OpCapability SampledBuffer -; CHECK-NEXT: OpCapability ShaderNonUniformEXT -; CHECK-NEXT: OpCapability UniformTexelBufferArrayNonUniformIndexing -; CHECK-NOT: OpCapability - -; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 -; CHECK-DAG: OpDecorate [[Var]] Binding 4 -; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform -; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform - -; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 -; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 1 R32i {{$}} -; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] -; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 -; CHECK-DAG: [[One]] = OpConstant [[int]] 1 -; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 -; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] -; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] -; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant - -; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} -; CHECK-NEXT: OpLabel -define void @main() #0 { -; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] -; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] - %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 0, i1 true) - -; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] -; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] - %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) - @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_1_24( - i32 3, i32 4, i32 3, i32 1, i1 true) - ret void -} - -attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/UnknownBufferStore.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/UnknownBufferStore.ll index 4c6f9bfd97ed7..a4123c36a4488 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/UnknownBufferStore.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/UnknownBufferStore.ll @@ -18,13 +18,14 @@ declare <4 x i32> @get_data() #1 ; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} ; CHECK-NEXT: OpLabel define void @RWBufferLoad_Vec4_I32() #0 { -; CHECK: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_0( i32 16, i32 7, i32 1, i32 0, i1 false) ; CHECK: [[data:%[0-9]+]] = OpFunctionCall %data = call <4 x i32> @get_data() + +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] ; CHECK: OpImageWrite [[buffer]] [[ten]] [[data]] call void @llvm.spv.resource.store.typedbuffer( target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) %buffer0, i32 10, <4 x i32> %data) From ee1c85225235c4353555a17b73ba16a2f177265b Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Fri, 17 Jan 2025 09:27:36 -0800 Subject: [PATCH 39/88] [DebugInfo][InstrRef] Treat ORRWrr as a copy instr (#123102) The insturction selector uses the `MachineFunction::copySalvageSSA` function to insert `DBG_PHIs` or identify a defining instruction for a copy-like instruction when finalizing Instruction References. AArch64 has the ORR instruction which is a logical OR with the variants ORRWrr which refers to a register to register variant, and ORRWrs which is a register to a shifted register variant. An ORRWrs where the shift amount is 0, and the zero register ($wzr) is used is considered a copy, for example: `$w0 = ORRWrs $wzr, killed $w3, 0` However an ORRWrr with a zero register is not considered a copy `$w0 = ORRWrr $wzr, killed $w3` This causes an issue in the livedebugvalues pass because in aarch64-isel the instruction is the ORRWrr variant, but is then changed to the ORRWrs variant before the livedebugvalues pass. This causes a mismatch between the two passes which leads to a crash in the livedebugvalues pass. This patch fixes the issue. --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 16 +++-- llvm/test/CodeGen/AArch64/instr-ref-ldv.ll | 70 ++++++++++++++++++++ 2 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/instr-ref-ldv.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index fd24e49f948a2..a2fd4963db108 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -9742,9 +9742,11 @@ AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg // and zero immediate operands used as an alias for mov instruction. - if (MI.getOpcode() == AArch64::ORRWrs && - MI.getOperand(1).getReg() == AArch64::WZR && - MI.getOperand(3).getImm() == 0x0 && + if (((MI.getOpcode() == AArch64::ORRWrs && + MI.getOperand(1).getReg() == AArch64::WZR && + MI.getOperand(3).getImm() == 0x0) || + (MI.getOpcode() == AArch64::ORRWrr && + MI.getOperand(1).getReg() == AArch64::WZR)) && // Check that the w->w move is not a zero-extending w->x mov. (!MI.getOperand(0).getReg().isVirtual() || MI.getOperand(0).getSubReg() == 0) && @@ -9764,9 +9766,11 @@ AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { std::optional AArch64InstrInfo::isCopyLikeInstrImpl(const MachineInstr &MI) const { - if (MI.getOpcode() == AArch64::ORRWrs && - MI.getOperand(1).getReg() == AArch64::WZR && - MI.getOperand(3).getImm() == 0x0) + if ((MI.getOpcode() == AArch64::ORRWrs && + MI.getOperand(1).getReg() == AArch64::WZR && + MI.getOperand(3).getImm() == 0x0) || + (MI.getOpcode() == AArch64::ORRWrr && + MI.getOperand(1).getReg() == AArch64::WZR)) return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; return std::nullopt; } diff --git a/llvm/test/CodeGen/AArch64/instr-ref-ldv.ll b/llvm/test/CodeGen/AArch64/instr-ref-ldv.ll new file mode 100644 index 0000000000000..fa00c75e2928b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/instr-ref-ldv.ll @@ -0,0 +1,70 @@ +; RUN: llc -O2 -experimental-debug-variable-locations %s -stop-after=livedebugvalues -mtriple=arm64-apple-macosx15.0.0 -o - | FileCheck %s + +; CHECK: $w{{[0-9]+}} = ORRWrs $wzr, killed $w{{[0-9]+}}, 0 +; CHECK-NEXT: DBG_INSTR_REF !{{[0-9]+}}, !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref({{[0-9]+}}, 0), debug-location !{{[0-9]+}} + +; This test makes sure that instruction referenced livedebugvalues pass doesn't crash when an ORRWrr is present before +; aarch64-isel and is converted to an ORRWrs with a shift amount immediate value of 0 before livedebugvalues, in this +; test case the MIR before both passes is shown below: + +; Before aarch64-isel +; %11:gpr32 = ORRWrr $wzr, killed %10:gpr32, debug-location !5; :0 +; %0:gpr64all = SUBREG_TO_REG 0, killed %11:gpr32, %subreg.sub_32, debug-location !5; :0 +; DBG_INSTR_REF !7, !DIExpression(DW_OP_LLVM_arg, 0), %0:gpr64all, debug-location !11; :0 @[ :0 ] line no:0 + +; Before livedebugvalues +; $w0 = ORRWrs $wzr, killed $w3, 0 +; DBG_INSTR_REF !7, !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref(3, 0), debug-location !11; :0 @[ :0 ] line no:0 + +; The livedebugvalues pass will consider the ORRWrs variant as a copy, therefore the aarch64-isel call to +; salvageCopySSA should do the same. + +%"class.llvm::iterator_range.53" = type { %"class.llvm::opt::arg_iterator.54", %"class.llvm::opt::arg_iterator.54" } +%"class.llvm::opt::arg_iterator.54" = type { %"class.std::__1::reverse_iterator", %"class.std::__1::reverse_iterator", [2 x %"class.llvm::opt::OptSpecifier"] } +%"class.std::__1::reverse_iterator" = type { ptr, ptr } +%"class.llvm::opt::OptSpecifier" = type { i32 } +declare noundef zeroext i1 @_ZNK4llvm3opt6Option7matchesENS0_12OptSpecifierE(ptr noundef nonnull align 8 dereferenceable(16), i64) local_unnamed_addr #1 +define noundef zeroext i1 @_ZNK4llvm3opt7ArgList14hasFlagNoClaimENS0_12OptSpecifierES2_b(ptr noundef nonnull align 8 dereferenceable(184) %this, i64 %Pos.coerce, i64 %Neg.coerce, i1 noundef zeroext %Default) local_unnamed_addr #2 !dbg !9383 { +entry: + %ref.tmp.i = alloca %"class.llvm::iterator_range.53", align 8 + %coerce.val.ii6 = and i64 %Pos.coerce, 4294967295, !dbg !9393 + #dbg_value(i64 %coerce.val.ii6, !9452, !DIExpression(), !9480) + %__begin0.sroa.4.0.ref.tmp.sroa_idx.i = getelementptr inbounds i8, ptr %ref.tmp.i, i64 8, !dbg !9480 + %__begin0.sroa.4.0.copyload.i = load ptr, ptr %__begin0.sroa.4.0.ref.tmp.sroa_idx.i, align 8, !dbg !9480 + %__end0.sroa.4.0.end_iterator.i.sroa_idx.i = getelementptr inbounds i8, ptr %ref.tmp.i, i64 48, !dbg !9480 + %__end0.sroa.4.0.copyload.i = load ptr, ptr %__end0.sroa.4.0.end_iterator.i.sroa_idx.i, align 8, !dbg !9480 + %cmp.i.i.i.not.i = icmp eq ptr %__begin0.sroa.4.0.copyload.i, %__end0.sroa.4.0.copyload.i, !dbg !9480 + br i1 %cmp.i.i.i.not.i, label %_ZNK4llvm3opt7ArgList17getLastArgNoClaimIJNS0_12OptSpecifierES3_EEEPNS0_3ArgEDpT_.exit.thread, label %_ZNK4llvm3opt7ArgList17getLastArgNoClaimIJNS0_12OptSpecifierES3_EEEPNS0_3ArgEDpT_.exit, !dbg !9480 +_ZNK4llvm3opt7ArgList17getLastArgNoClaimIJNS0_12OptSpecifierES3_EEEPNS0_3ArgEDpT_.exit.thread: ; preds = %entry + br label %1, !dbg !9480 +_ZNK4llvm3opt7ArgList17getLastArgNoClaimIJNS0_12OptSpecifierES3_EEEPNS0_3ArgEDpT_.exit: ; preds = %entry + %incdec.ptr.i.i.i = getelementptr inbounds i8, ptr %__begin0.sroa.4.0.copyload.i, i64 -8, !dbg !9480 + %0 = load ptr, ptr %incdec.ptr.i.i.i, align 8, !dbg !9527, !tbaa !9528 + %tobool.not.not = icmp eq ptr %0, null, !dbg !9480 + br i1 %tobool.not.not, label %1, label %cleanup, !dbg !9480 +cleanup: ; preds = %_ZNK4llvm3opt7ArgList17getLastArgNoClaimIJNS0_12OptSpecifierES3_EEEPNS0_3ArgEDpT_.exit + %call13 = call noundef zeroext i1 @_ZNK4llvm3opt6Option7matchesENS0_12OptSpecifierE(ptr noundef nonnull align 8 dereferenceable(16) %0, i64 %coerce.val.ii6) #3, !dbg !9480 + br label %1 + %2 = phi i1 [ %call13, %cleanup ], [ %Default, %_ZNK4llvm3opt7ArgList17getLastArgNoClaimIJNS0_12OptSpecifierES3_EEEPNS0_3ArgEDpT_.exit ], [ %Default, %_ZNK4llvm3opt7ArgList17getLastArgNoClaimIJNS0_12OptSpecifierES3_EEEPNS0_3ArgEDpT_.exit.thread ] + ret i1 %2, !dbg !9480 +} +!llvm.module.flags = !{!2, !6} +!llvm.dbg.cu = !{!7} +!2 = !{i32 2, !"Debug Info Version", i32 3} +!6 = !{i32 7, !"frame-pointer", i32 1} +!7 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !8, emissionKind: FullDebug, sdk: "MacOSX15.3.sdk") +!8 = !DIFile(filename: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/llvm/lib/Option/ArgList.cpp", directory: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/build-instr-ref-stage2", checksumkind: CSK_MD5, checksum: "a3198e8ace679c7b1581a26b5583c658") +!3116 = distinct !DICompositeType(tag: DW_TAG_class_type, size: 32) +!9383 = distinct !DISubprogram(unit: !7, flags: DIFlagArtificial | DIFlagObjectPointer) +!9391 = distinct !DILexicalBlock(scope: !9383, line: 80, column: 12) +!9393 = !DILocation(scope: !9391) +!9440 = distinct !DILexicalBlock(scope: !9441, line: 269, column: 5) +!9441 = distinct !DILexicalBlock(scope: !9442, line: 269, column: 5) +!9442 = distinct !DISubprogram(unit: !7, retainedNodes: !9450) +!9450 = !{} +!9452 = !DILocalVariable(scope: !9442, type: !3116) +!9478 = distinct !DILocation(scope: !9391) +!9480 = !DILocation(scope: !9441, inlinedAt: !9478) +!9527 = !DILocation(scope: !9440, inlinedAt: !9478) +!9528 = !{!"any pointer", !9530, i64 0} +!9530 = !{} From 3065cf238ca3fddd74cba0bd660afe5fd4e7da03 Mon Sep 17 00:00:00 2001 From: Jorge Gorbe Moya Date: Fri, 17 Jan 2025 09:34:23 -0800 Subject: [PATCH 40/88] [bazel] Remove //clang:basic_internal_headers target (NFC) (#123230) This target exists to allow `#include "Header.h"` for headers in lib/Basic rather than using file-relative inclusion. This is rather hacky and results in having two targets that claim the same headers. Instead, we can pass a `-I` flag in the `copts` for //clang:basic, to adjust the include path to keep those `#include "Header.h"` directives working. There are other targets in this file already doing a similar thing for generated files. --- .../bazel/llvm-project-overlay/clang/BUILD.bazel | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 2286d4cd35e08..e3f4fab2c3fdb 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -666,19 +666,6 @@ py_binary( main = "utils/bundle_resources.py", ) -# A hacky library to expose some internal headers of the `basic` library to its -# own implementation source files using a stripped include prefix rather than -# file-relative-inclusion. This is inherently non-modular as these headers will -# be repeated in the sources below for file-relative-inclusion. -cc_library( - name = "basic_internal_headers", - hdrs = glob([ - "lib/Basic/*.h", - ]), - features = ["-header_modules"], - strip_include_prefix = "lib/Basic", -) - cc_library( name = "basic", srcs = [ @@ -696,6 +683,7 @@ cc_library( copts = [ "-DHAVE_VCS_VERSION_INC", "$(STACK_FRAME_UNLIMITED)", + "-I$(WORKSPACE_ROOT)/clang/lib/Basic", ], includes = ["include"], textual_hdrs = [ @@ -731,6 +719,7 @@ cc_library( ] + glob([ "include/clang/Basic/*.def", ]), + toolchains = [":workspace_root"], deps = [ ":basic_arm_cde_gen", ":basic_arm_fp16_inc_gen", @@ -750,7 +739,6 @@ cc_library( ":basic_builtins_spirv_gen", ":basic_builtins_x86_64_gen", ":basic_builtins_x86_gen", - ":basic_internal_headers", ":basic_riscv_sifive_vector_builtins_gen", ":basic_riscv_vector_builtin_cg_gen", ":basic_riscv_vector_builtins_gen", From ebfdd38228d4e21597642301fb75f5b02ff3ee06 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 17 Jan 2025 09:47:37 -0800 Subject: [PATCH 41/88] [SLP][NFC]Replace undef with constant zero in tests, NFC --- .../vectorize-free-extracts-inserts.ll | 56 ++++++++------- .../test/Transforms/SLPVectorizer/X86/hadd.ll | 67 ++++++++++++------ .../Transforms/SLPVectorizer/X86/sitofp.ll | 22 +++--- .../Transforms/SLPVectorizer/X86/value-bug.ll | 58 ++++++++-------- .../X86/vectorize-widest-phis.ll | 41 +++++------ ...l => insert-element-build-vector-const.ll} | 5 +- .../insert-element-build-vector.ll | 68 ++++++++++--------- 7 files changed, 179 insertions(+), 138 deletions(-) rename llvm/test/Transforms/SLPVectorizer/{insert-element-build-vector-const-undef.ll => insert-element-build-vector-const.ll} (81%) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll index 4755c690c0711..4b6f0438b8915 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -34,7 +34,7 @@ bb: %a.lane.0 = fmul double %v1.lane.0, %v2.lane.2 %a.lane.1 = fmul double %v1.lane.1, %v2.lane.3 - %a.ins.0 = insertelement <2 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <2 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <2 x double> %a.ins.0, double %a.lane.1, i32 1 call void @use(double %v1.lane.0) @@ -73,7 +73,7 @@ bb: %a.lane.0 = fmul double %v1.lane.0, %v2.lane.2 %a.lane.1 = fmul double %v3.lane.1, %v2.lane.2 - %a.ins.0 = insertelement <2 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <2 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <2 x double> %a.ins.0, double %a.lane.1, i32 1 call void @use(double %v1.lane.0) @@ -95,7 +95,8 @@ define void @noop_extract_second_2_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> undef, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> zeroinitializer, <4 x double> [[TMP4]], <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) ; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[PTR_1]], align 8 @@ -112,7 +113,7 @@ bb: %a.lane.0 = fmul double %v1.lane.2, %v2.lane.2 %a.lane.1 = fmul double %v1.lane.3, %v2.lane.2 - %a.ins.0 = insertelement <4 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <4 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <4 x double> %a.ins.0, double %a.lane.1, i32 1 call void @use(double %v1.lane.2) @@ -149,7 +150,7 @@ bb: %a.lane.0 = fmul double %v1.lane.1, %v2.lane.2 %a.lane.1 = fmul double %v1.lane.0, %v2.lane.2 - %a.ins.0 = insertelement <2 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <2 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <2 x double> %a.ins.0, double %a.lane.1, i32 1 call void @use(double %v1.lane.0) @@ -170,7 +171,8 @@ define void @extract_lanes_1_and_2(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> undef, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> zeroinitializer, <4 x double> [[TMP4]], <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[PTR_1]], align 8 @@ -187,7 +189,7 @@ bb: %a.lane.0 = fmul double %v1.lane.1, %v2.lane.2 %a.lane.1 = fmul double %v1.lane.2, %v2.lane.2 - %a.ins.0 = insertelement <4 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <4 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <4 x double> %a.ins.0, double %a.lane.1, i32 1 call void @use(double %v1.lane.1) @@ -213,7 +215,8 @@ define void @noop_extracts_existing_vector_4_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <9 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP4]], <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) @@ -235,7 +238,7 @@ bb: %a.lane.1 = fmul double %v1.lane.3, %v2.lane.2 %a.lane.2 = fmul double %v1.lane.0, %v2.lane.2 %a.lane.3 = fmul double %v1.lane.1, %v2.lane.0 - %a.ins.0 = insertelement <9 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <9 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <9 x double> %a.ins.0, double %a.lane.1, i32 1 %a.ins.2 = insertelement <9 x double> %a.ins.1, double %a.lane.2, i32 2 %a.ins.3 = insertelement <9 x double> %a.ins.2, double %a.lane.3, i32 3 @@ -261,7 +264,8 @@ define void @extracts_jumbled_4_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <9 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP4]], <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) @@ -283,7 +287,7 @@ bb: %a.lane.1 = fmul double %v1.lane.2, %v2.lane.1 %a.lane.2 = fmul double %v1.lane.1, %v2.lane.2 %a.lane.3 = fmul double %v1.lane.3, %v2.lane.0 - %a.ins.0 = insertelement <9 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <9 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <9 x double> %a.ins.0, double %a.lane.1, i32 1 %a.ins.2 = insertelement <9 x double> %a.ins.1, double %a.lane.2, i32 2 %a.ins.3 = insertelement <9 x double> %a.ins.2, double %a.lane.3, i32 3 @@ -313,12 +317,14 @@ define void @noop_extracts_9_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[A_INS_72:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP3]], <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP7]], <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[B_INS_71]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -350,7 +356,7 @@ bb: %a.lane.7 = fmul double %v1.lane.1, %v2.lane.1 %a.lane.8 = fmul double %v1.lane.2, %v2.lane.0 - %a.ins.0 = insertelement <9 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <9 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <9 x double> %a.ins.0, double %a.lane.1, i32 1 %a.ins.2 = insertelement <9 x double> %a.ins.1, double %a.lane.2, i32 2 %a.ins.3 = insertelement <9 x double> %a.ins.2, double %a.lane.3, i32 3 @@ -370,7 +376,7 @@ bb: %b.lane.7 = fmul double %v1.lane.4, %v2.lane.1 %b.lane.8 = fmul double %v1.lane.5, %v2.lane.0 - %b.ins.0 = insertelement <9 x double> undef, double %b.lane.0, i32 0 + %b.ins.0 = insertelement <9 x double> zeroinitializer, double %b.lane.0, i32 0 %b.ins.1 = insertelement <9 x double> %b.ins.0, double %b.lane.1, i32 1 %b.ins.2 = insertelement <9 x double> %b.ins.1, double %b.lane.2, i32 2 %b.ins.3 = insertelement <9 x double> %b.ins.2, double %b.lane.3, i32 3 @@ -401,12 +407,14 @@ define void @first_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[A_INS_72:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP3]], <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fmul <8 x double> [[TMP4]], [[TMP1]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP6]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP6]], <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[B_INS_71]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -438,7 +446,7 @@ bb: %a.lane.7 = fmul double %v1.lane.0, %v2.lane.2 %a.lane.8 = fmul double %v1.lane.2, %v2.lane.1 - %a.ins.0 = insertelement <9 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <9 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <9 x double> %a.ins.0, double %a.lane.1, i32 1 %a.ins.2 = insertelement <9 x double> %a.ins.1, double %a.lane.2, i32 2 %a.ins.3 = insertelement <9 x double> %a.ins.2, double %a.lane.3, i32 3 @@ -458,7 +466,7 @@ bb: %b.lane.7 = fmul double %v1.lane.4, %v2.lane.2 %b.lane.8 = fmul double %v1.lane.5, %v2.lane.0 - %b.ins.0 = insertelement <9 x double> undef, double %b.lane.0, i32 0 + %b.ins.0 = insertelement <9 x double> zeroinitializer, double %b.lane.0, i32 0 %b.ins.1 = insertelement <9 x double> %b.ins.0, double %b.lane.1, i32 1 %b.ins.2 = insertelement <9 x double> %b.ins.1, double %b.lane.2, i32 2 %b.ins.3 = insertelement <9 x double> %b.ins.2, double %b.lane.3, i32 3 @@ -490,12 +498,14 @@ define void @first_and_second_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 +; CHECK-NEXT: [[A_INS_72:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP3]], <9 x i32> +; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> -; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 +; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> zeroinitializer, <9 x double> [[TMP7]], <9 x i32> +; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[B_INS_71]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 ; CHECK-NEXT: ret void @@ -527,7 +537,7 @@ bb: %a.lane.7 = fmul double %v1.lane.0, %v2.lane.1 %a.lane.8 = fmul double %v1.lane.2, %v2.lane.0 - %a.ins.0 = insertelement <9 x double> undef, double %a.lane.0, i32 0 + %a.ins.0 = insertelement <9 x double> zeroinitializer, double %a.lane.0, i32 0 %a.ins.1 = insertelement <9 x double> %a.ins.0, double %a.lane.1, i32 1 %a.ins.2 = insertelement <9 x double> %a.ins.1, double %a.lane.2, i32 2 %a.ins.3 = insertelement <9 x double> %a.ins.2, double %a.lane.3, i32 3 @@ -547,7 +557,7 @@ bb: %b.lane.7 = fmul double %v1.lane.5, %v2.lane.0 %b.lane.8 = fmul double %v1.lane.4, %v2.lane.2 - %b.ins.0 = insertelement <9 x double> undef, double %b.lane.0, i32 0 + %b.ins.0 = insertelement <9 x double> zeroinitializer, double %b.lane.0, i32 0 %b.ins.1 = insertelement <9 x double> %b.ins.0, double %b.lane.1, i32 1 %b.ins.2 = insertelement <9 x double> %b.ins.1, double %b.lane.2, i32 2 %b.ins.3 = insertelement <9 x double> %b.ins.2, double %b.lane.3, i32 3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll index e30f84e4f17b6..b83d35541bbae 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -23,7 +23,7 @@ define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) { %b1 = extractelement <2 x double> %b, i32 1 %r0 = fadd double %a0, %a1 %r1 = fadd double %b0, %b1 - %r00 = insertelement <2 x double> undef, double %r0, i32 0 + %r00 = insertelement <2 x double> zeroinitializer, double %r0, i32 0 %r01 = insertelement <2 x double> %r00, double %r1, i32 1 ret <2 x double> %r01 } @@ -47,7 +47,7 @@ define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) { %r1 = fadd float %a2, %a3 %r2 = fadd float %b0, %b1 %r3 = fadd float %b2, %b3 - %r00 = insertelement <4 x float> undef, float %r0, i32 0 + %r00 = insertelement <4 x float> zeroinitializer, float %r0, i32 0 %r01 = insertelement <4 x float> %r00, float %r1, i32 1 %r02 = insertelement <4 x float> %r01, float %r2, i32 2 %r03 = insertelement <4 x float> %r02, float %r3, i32 3 @@ -67,7 +67,7 @@ define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) { %b1 = extractelement <2 x i64> %b, i32 1 %r0 = add i64 %a0, %a1 %r1 = add i64 %b0, %b1 - %r00 = insertelement <2 x i64> undef, i64 %r0, i32 0 + %r00 = insertelement <2 x i64> zeroinitializer, i64 %r0, i32 0 %r01 = insertelement <2 x i64> %r00, i64 %r1, i32 1 ret <2 x i64> %r01 } @@ -91,7 +91,7 @@ define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) { %r1 = add i32 %a2, %a3 %r2 = add i32 %b0, %b1 %r3 = add i32 %b2, %b3 - %r00 = insertelement <4 x i32> undef, i32 %r0, i32 0 + %r00 = insertelement <4 x i32> zeroinitializer, i32 %r0, i32 0 %r01 = insertelement <4 x i32> %r00, i32 %r1, i32 1 %r02 = insertelement <4 x i32> %r01, i32 %r2, i32 2 %r03 = insertelement <4 x i32> %r02, i32 %r3, i32 3 @@ -129,7 +129,7 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { %r5 = add i16 %b2, %b3 %r6 = add i16 %b4, %b5 %r7 = add i16 %b6, %b7 - %r00 = insertelement <8 x i16> undef, i16 %r0, i32 0 + %r00 = insertelement <8 x i16> zeroinitializer, i16 %r0, i32 0 %r01 = insertelement <8 x i16> %r00, i16 %r1, i32 1 %r02 = insertelement <8 x i16> %r01, i16 %r2, i32 2 %r03 = insertelement <8 x i16> %r02, i16 %r3, i32 3 @@ -204,7 +204,7 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { %r1 = fadd double %b0, %b1 %r2 = fadd double %a2, %a3 %r3 = fadd double %b2, %b3 - %r00 = insertelement <4 x double> undef, double %r0, i32 0 + %r00 = insertelement <4 x double> zeroinitializer, double %r0, i32 0 %r01 = insertelement <4 x double> %r00, double %r1, i32 1 %r02 = insertelement <4 x double> %r01, double %r2, i32 2 %r03 = insertelement <4 x double> %r02, double %r3, i32 3 @@ -213,16 +213,41 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { ; PR50392 define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @test_v4f64_partial_swizzle( -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> , <4 x i32> -; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 -; CHECK-NEXT: ret <4 x double> [[R03]] +; SSE-LABEL: @test_v4f64_partial_swizzle( +; SSE-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 +; SSE-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[R0212:%.*]] = insertelement <4 x double> [[TMP4]], double 0.000000e+00, i64 1 +; SSE-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R0212]], double [[R3]], i64 3 +; SSE-NEXT: ret <4 x double> [[R03]] +; +; SLM-LABEL: @test_v4f64_partial_swizzle( +; SLM-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 +; SLM-NEXT: [[R0:%.*]] = fadd double [[A0]], [[A1]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> +; SLM-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SLM-NEXT: [[R00:%.*]] = insertelement <4 x double> , double [[R0]], i64 0 +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SLM-NEXT: [[R031:%.*]] = shufflevector <4 x double> [[R00]], <4 x double> [[TMP4]], <4 x i32> +; SLM-NEXT: ret <4 x double> [[R031]] +; +; AVX-LABEL: @test_v4f64_partial_swizzle( +; AVX-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 +; AVX-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; AVX-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] +; AVX-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; AVX-NEXT: [[R0212:%.*]] = insertelement <4 x double> [[TMP4]], double 0.000000e+00, i64 1 +; AVX-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R0212]], double [[R3]], i64 3 +; AVX-NEXT: ret <4 x double> [[R03]] ; %a0 = extractelement <4 x double> %a, i64 0 %a1 = extractelement <4 x double> %a, i64 1 @@ -233,7 +258,7 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b %r0 = fadd double %a0, %a1 %r2 = fadd double %b0, %b1 %r3 = fadd double %b2, %b3 - %r00 = insertelement <4 x double> undef, double %r0, i32 0 + %r00 = insertelement <4 x double> zeroinitializer, double %r0, i32 0 %r02 = insertelement <4 x double> %r00, double %r2, i32 2 %r03 = insertelement <4 x double> %r02, double %r3, i32 3 ret <4 x double> %r03 @@ -290,7 +315,7 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { %r5 = fadd float %a6, %a7 %r6 = fadd float %b4, %b5 %r7 = fadd float %b6, %b7 - %r00 = insertelement <8 x float> undef, float %r0, i32 0 + %r00 = insertelement <8 x float> zeroinitializer, float %r0, i32 0 %r01 = insertelement <8 x float> %r00, float %r1, i32 1 %r02 = insertelement <8 x float> %r01, float %r2, i32 2 %r03 = insertelement <8 x float> %r02, float %r3, i32 3 @@ -340,7 +365,7 @@ define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { %r1 = add i64 %b0, %b1 %r2 = add i64 %a2, %a3 %r3 = add i64 %b2, %b3 - %r00 = insertelement <4 x i64> undef, i64 %r0, i32 0 + %r00 = insertelement <4 x i64> zeroinitializer, i64 %r0, i32 0 %r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1 %r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2 %r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3 @@ -398,7 +423,7 @@ define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { %r5 = add i32 %a6, %a7 %r6 = add i32 %b4, %b5 %r7 = add i32 %b6, %b7 - %r00 = insertelement <8 x i32> undef, i32 %r0, i32 0 + %r00 = insertelement <8 x i32> zeroinitializer, i32 %r0, i32 0 %r01 = insertelement <8 x i32> %r00, i32 %r1, i32 1 %r02 = insertelement <8 x i32> %r01, i32 %r2, i32 2 %r03 = insertelement <8 x i32> %r02, i32 %r3, i32 3 @@ -484,7 +509,7 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { %r13 = add i16 %b10, %b11 %r14 = add i16 %b12, %b13 %r15 = add i16 %b14, %b15 - %rv0 = insertelement <16 x i16> undef, i16 %r0 , i32 0 + %rv0 = insertelement <16 x i16> zeroinitializer, i16 %r0 , i32 0 %rv1 = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1 %rv2 = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2 %rv3 = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll index 1c56eb2f2ce36..ad0027330868c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll @@ -1093,8 +1093,9 @@ define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 { ; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[A3:%.*]], i32 1 ; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double> ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[RES11:%.*]] = shufflevector <4 x double> zeroinitializer, <4 x double> [[TMP7]], <4 x i32> ; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> +; SSE-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[RES11]], <4 x double> [[TMP8]], <4 x i32> ; SSE-NEXT: ret <4 x double> [[RES31]] ; ; AVX-LABEL: @sitofp_4xi32_4f64( @@ -1109,7 +1110,7 @@ define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 { %cvt1 = sitofp i32 %a1 to double %cvt2 = sitofp i32 %a2 to double %cvt3 = sitofp i32 %a3 to double - %res0 = insertelement <4 x double> undef, double %cvt0, i32 0 + %res0 = insertelement <4 x double> zeroinitializer, double %cvt0, i32 0 %res1 = insertelement <4 x double> %res0, double %cvt1, i32 1 %res2 = insertelement <4 x double> %res1, double %cvt2, i32 2 %res3 = insertelement <4 x double> %res2, double %cvt3, i32 3 @@ -1121,30 +1122,31 @@ define <4 x double> @sitofp_with_const_4xi32_4f64(i32 %a2, i32 %a3) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A3:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double> -; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> undef, <4 x i32> +; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x double> zeroinitializer, double 1.000000e+00, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[RES0]], <4 x double> [[TMP4]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[RES31]] ; %cvt2 = sitofp i32 %a2 to double %cvt3 = sitofp i32 %a3 to double - %res0 = insertelement <4 x double> undef, double 1.0, i32 3 + %res0 = insertelement <4 x double> zeroinitializer, double 1.0, i32 3 %res2 = insertelement <4 x double> %res0, double %cvt2, i32 0 %res3 = insertelement <4 x double> %res2, double %cvt3, i32 1 ret <4 x double> %res3 } -define <4 x double> @sitofp_with_undef_4xi32_4f64(i32 %a2, i32 %a3) #0 { -; CHECK-LABEL: @sitofp_with_undef_4xi32_4f64( +define <4 x double> @sitofp_with_zeroinitializer_4xi32_4f64(i32 %a2, i32 %a3) #0 { +; CHECK-LABEL: @sitofp_with_zeroinitializer_4xi32_4f64( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A3:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> undef, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> zeroinitializer, <4 x double> [[TMP5]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP4]] ; %cvt2 = sitofp i32 %a2 to double %cvt3 = sitofp i32 %a3 to double - %res2 = insertelement <4 x double> undef, double %cvt2, i32 0 + %res2 = insertelement <4 x double> zeroinitializer, double %cvt2, i32 0 %res3 = insertelement <4 x double> %res2, double %cvt3, i32 1 ret <4 x double> %res3 } @@ -1162,7 +1164,7 @@ define <4 x float> @sitofp_4xi32_4f32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 { %cvt1 = sitofp i32 %a1 to float %cvt2 = sitofp i32 %a2 to float %cvt3 = sitofp i32 %a3 to float - %res0 = insertelement <4 x float> undef, float %cvt0, i32 0 + %res0 = insertelement <4 x float> zeroinitializer, float %cvt0, i32 0 %res1 = insertelement <4 x float> %res0, float %cvt1, i32 1 %res2 = insertelement <4 x float> %res1, float %cvt2, i32 2 %res3 = insertelement <4 x float> %res2, float %cvt3, i32 3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll index c5cdcdc1eb1a5..3c34abcdd36a6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -13,27 +13,27 @@ define void @test(i1 %arg) { ; CHECK-NEXT: bb279: ; CHECK-NEXT: br label [[BB283:%.*]] ; CHECK: bb283: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ undef, [[EXIT]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ zeroinitializer, [[BB279]] ], [ zeroinitializer, [[EXIT]] ] ; CHECK-NEXT: br label [[BB284:%.*]] ; CHECK: bb284: ; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], zeroinitializer ; CHECK-NEXT: br label [[BB21_I:%.*]] ; CHECK: bb21.i: -; CHECK-NEXT: br i1 %arg, label [[BB22_I:%.*]], label [[EXIT]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB22_I:%.*]], label [[EXIT]] ; CHECK: bb22.i: -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> zeroinitializer, [[TMP4]] ; CHECK-NEXT: br label [[BB32_I:%.*]] ; CHECK: bb32.i: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] -; CHECK-NEXT: br i1 %arg, label [[BB32_I]], label [[BB21_I]] +; CHECK-NEXT: br i1 [[ARG]], label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> ; CHECK-NEXT: br label [[BB283]] ; @@ -41,27 +41,27 @@ bb279: br label %bb283 bb283: - %Av.sroa.8.0 = phi float [ undef, %bb279 ], [ %tmp315, %exit ] - %Av.sroa.5.0 = phi float [ undef, %bb279 ], [ %tmp319, %exit ] - %Av.sroa.3.0 = phi float [ undef, %bb279 ], [ %tmp307, %exit ] - %Av.sroa.0.0 = phi float [ undef, %bb279 ], [ %tmp317, %exit ] + %Av.sroa.8.0 = phi float [ zeroinitializer, %bb279 ], [ %tmp315, %exit ] + %Av.sroa.5.0 = phi float [ zeroinitializer, %bb279 ], [ %tmp319, %exit ] + %Av.sroa.3.0 = phi float [ zeroinitializer, %bb279 ], [ %tmp307, %exit ] + %Av.sroa.0.0 = phi float [ zeroinitializer, %bb279 ], [ %tmp317, %exit ] br label %bb284 bb284: %tmp7.i = fpext float %Av.sroa.3.0 to double - %tmp8.i = fsub double %tmp7.i, undef - %tmp9.i = fsub double %tmp8.i, undef + %tmp8.i = fsub double %tmp7.i, zeroinitializer + %tmp9.i = fsub double %tmp8.i, zeroinitializer %tmp17.i = fpext float %Av.sroa.8.0 to double - %tmp19.i = fsub double %tmp17.i, undef - %tmp20.i = fsub double %tmp19.i, undef + %tmp19.i = fsub double %tmp17.i, zeroinitializer + %tmp20.i = fsub double %tmp19.i, zeroinitializer br label %bb21.i bb21.i: br i1 %arg, label %bb22.i, label %exit bb22.i: - %tmp24.i = fadd double undef, %tmp9.i - %tmp26.i = fadd double undef, %tmp20.i + %tmp24.i = fadd double zeroinitializer, %tmp9.i + %tmp26.i = fadd double zeroinitializer, %tmp20.i br label %bb32.i bb32.i: @@ -71,17 +71,17 @@ bb32.i: exit: %tmp303 = fpext float %Av.sroa.0.0 to double - %tmp304 = fmul double %tmp303, undef - %tmp305 = fadd double undef, %tmp304 - %tmp306 = fadd double %tmp305, undef + %tmp304 = fmul double %tmp303, zeroinitializer + %tmp305 = fadd double zeroinitializer, %tmp304 + %tmp306 = fadd double %tmp305, zeroinitializer %tmp307 = fptrunc double %tmp306 to float %tmp311 = fpext float %Av.sroa.5.0 to double %tmp312 = fmul double %tmp311, 0.000000e+00 - %tmp313 = fadd double undef, %tmp312 - %tmp314 = fadd double %tmp313, undef + %tmp313 = fadd double zeroinitializer, %tmp312 + %tmp314 = fadd double %tmp313, zeroinitializer %tmp315 = fptrunc double %tmp314 to float - %tmp317 = fptrunc double undef to float - %tmp319 = fptrunc double undef to float + %tmp317 = fptrunc double zeroinitializer to float + %tmp319 = fptrunc double zeroinitializer to float br label %bb283 } @@ -91,13 +91,13 @@ exit: define <4 x double> @constant_folding() { ; CHECK-LABEL: @constant_folding( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x double> +; CHECK-NEXT: ret <4 x double> ; entry: %t0 = fadd double 1.000000e+00 , 0.000000e+00 %t1 = fadd double 1.000000e+00 , 1.000000e+00 %t2 = fmul double %t0, 1.000000e+00 - %i1 = insertelement <4 x double> undef, double %t2, i32 1 + %i1 = insertelement <4 x double> zeroinitializer, double %t2, i32 1 %t3 = fmul double %t1, 1.000000e+00 %i2 = insertelement <4 x double> %i1, double %t3, i32 0 ret <4 x double> %i2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index a69849fabcef6..6a479174777b0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -4,26 +4,27 @@ define void @foo(i1 %arg) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = uitofp i16 undef to float -; CHECK-NEXT: [[SUB:%.*]] = fsub float 6.553500e+04, undef +; CHECK-NEXT: [[CONV:%.*]] = uitofp i16 0 to float +; CHECK-NEXT: [[SUB:%.*]] = fsub float 6.553500e+04, 0.000000e+00 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[SUB]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[SUB]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: ; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ] -; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 -; CHECK-NEXT: br i1 %arg, label [[BB3]], label [[BB4:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr null, align 8 +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB3]], label [[BB4:%.*]] ; CHECK: bb4: ; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double> -; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 undef to double -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> , double [[CONV2]], i32 1 +; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 0 to double +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> , double [[CONV2]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> undef, <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> , <4 x double> [[TMP15]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]] ; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float> ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]] @@ -33,8 +34,8 @@ define void @foo(i1 %arg) { ; CHECK-NEXT: br label [[BB2]] ; entry: - %conv = uitofp i16 undef to float - %sub = fsub float 6.553500e+04, undef + %conv = uitofp i16 zeroinitializer to float + %sub = fsub float 6.553500e+04, zeroinitializer br label %bb1 bb1: @@ -43,28 +44,28 @@ bb1: bb2: %0 = phi float [ %sub, %bb1 ], [ %9, %bb3 ] %1 = phi float [ %conv, %bb1 ], [ %10, %bb3 ] - %2 = phi float [ undef, %bb1 ], [ %11, %bb3 ] - %3 = phi float [ undef, %bb1 ], [ %12, %bb3 ] - %4 = load double, ptr undef, align 8 + %2 = phi float [ zeroinitializer, %bb1 ], [ %11, %bb3 ] + %3 = phi float [ zeroinitializer, %bb1 ], [ %12, %bb3 ] + %4 = load double, ptr zeroinitializer, align 8 br i1 %arg, label %bb3, label %bb4 bb4: %ext = fpext float %3 to double - %cmp1 = fcmp ogt double undef, %ext - %5 = fptrunc double undef to float + %cmp1 = fcmp ogt double zeroinitializer, %ext + %5 = fptrunc double zeroinitializer to float %sel1 = select i1 %cmp1, float %3, float %5 %ext2 = fpext float %2 to double - %cmp2 = fcmp ogt double undef, %ext2 - %6 = fptrunc double undef to float + %cmp2 = fcmp ogt double zeroinitializer, %ext2 + %6 = fptrunc double zeroinitializer to float %sel2 = select i1 %cmp2, float %2, float %6 %ext3 = fpext float %1 to double - %conv2 = uitofp i16 undef to double + %conv2 = uitofp i16 zeroinitializer to double %add1 = fadd double %4, %conv2 %cmp3 = fcmp ogt double %add1, %ext3 %7 = fptrunc double %add1 to float %sel3 = select i1 %cmp3, float %1, float %7 %ext4 = fpext float %0 to double - %sub1 = fsub double undef, undef + %sub1 = fsub double zeroinitializer, zeroinitializer %cmp4 = fcmp ogt double %sub1, %ext4 %8 = fptrunc double %sub1 to float %sel4 = select i1 %cmp4, float %0, float %8 diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-const-undef.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-const.ll similarity index 81% rename from llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-const-undef.ll rename to llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-const.ll index 48b5145622bdf..a0e3950e49117 100644 --- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-const-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-const.ll @@ -9,7 +9,8 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> , <4 x float> [[TMP7]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP6]] ; %c0 = extractelement <4 x i32> %c, i32 0 @@ -22,7 +23,7 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) %cmp1 = icmp ne i32 %c1, 0 %s0 = select i1 %cmp0, float %a0, float %b0 %s1 = select i1 %cmp1, float %a1, float %b1 - %ra = insertelement <4 x float> , float %s0, i32 0 + %ra = insertelement <4 x float> , float %s0, i32 0 %rb = insertelement <4 x float> %ra, float %s1, i32 1 ret <4 x float> %rb } diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll index 8e3a941932c97..0b896f4b3a36a 100644 --- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll @@ -32,7 +32,7 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) %s1 = select i1 %cmp1, float %a1, float %b1 %s2 = select i1 %cmp2, float %a2, float %b2 %s3 = select i1 %cmp3, float %a3, float %b3 - %ra = insertelement <4 x float> undef, float %s0, i32 0 + %ra = insertelement <4 x float> zeroinitializer, float %s0, i32 0 %rb = insertelement <4 x float> %ra, float %s1, i32 1 %rc = insertelement <4 x float> %rb, float %s2, i32 2 %rd = insertelement <4 x float> %rc, float %s3, i32 3 @@ -43,7 +43,8 @@ define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) ; CHECK-LABEL: @simple_select2( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> zeroinitializer, <8 x float> [[TMP4]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP3]] ; %c0 = extractelement <4 x i32> %c, i32 0 @@ -66,7 +67,7 @@ define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) %s1 = select i1 %cmp1, float %a1, float %b1 %s2 = select i1 %cmp2, float %a2, float %b2 %s3 = select i1 %cmp3, float %a3, float %b3 - %ra = insertelement <8 x float> undef, float %s0, i32 0 + %ra = insertelement <8 x float> zeroinitializer, float %s0, i32 0 %rb = insertelement <8 x float> %ra, float %s1, i32 2 %rc = insertelement <8 x float> %rb, float %s2, i32 4 %rd = insertelement <8 x float> %rc, float %s3, i32 7 @@ -98,7 +99,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; THRESHOLD-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] ; THRESHOLD-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]] ; THRESHOLD-NEXT: [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]] -; THRESHOLD-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0 +; THRESHOLD-NEXT: [[RA:%.*]] = insertelement <4 x float> zeroinitializer, float [[S0]], i32 0 ; THRESHOLD-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1 ; THRESHOLD-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[S2]], i32 2 ; THRESHOLD-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 @@ -113,7 +114,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; THRESHOLD-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]] ; THRESHOLD-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]] ; THRESHOLD-NEXT: call void @llvm.assume(i1 [[QI]]) -; THRESHOLD-NEXT: ret <4 x float> undef +; THRESHOLD-NEXT: ret <4 x float> zeroinitializer ; ; NOTHRESHOLD-LABEL: @simple_select_eph( ; NOTHRESHOLD-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 @@ -136,7 +137,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; NOTHRESHOLD-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] ; NOTHRESHOLD-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]] ; NOTHRESHOLD-NEXT: [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]] -; NOTHRESHOLD-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0 +; NOTHRESHOLD-NEXT: [[RA:%.*]] = insertelement <4 x float> zeroinitializer, float [[S0]], i32 0 ; NOTHRESHOLD-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1 ; NOTHRESHOLD-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[S2]], i32 2 ; NOTHRESHOLD-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 @@ -149,7 +150,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; NOTHRESHOLD-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]] ; NOTHRESHOLD-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]] ; NOTHRESHOLD-NEXT: call void @llvm.assume(i1 [[QI]]) -; NOTHRESHOLD-NEXT: ret <4 x float> undef +; NOTHRESHOLD-NEXT: ret <4 x float> zeroinitializer ; ; MINTREESIZE-LABEL: @simple_select_eph( ; MINTREESIZE-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 @@ -176,7 +177,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; MINTREESIZE-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] ; MINTREESIZE-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]] ; MINTREESIZE-NEXT: [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]] -; MINTREESIZE-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0 +; MINTREESIZE-NEXT: [[RA:%.*]] = insertelement <4 x float> zeroinitializer, float [[S0]], i32 0 ; MINTREESIZE-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1 ; MINTREESIZE-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[S2]], i32 2 ; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 @@ -193,7 +194,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; MINTREESIZE-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]] ; MINTREESIZE-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]] ; MINTREESIZE-NEXT: call void @llvm.assume(i1 [[QI]]) -; MINTREESIZE-NEXT: ret <4 x float> undef +; MINTREESIZE-NEXT: ret <4 x float> zeroinitializer ; %c0 = extractelement <4 x i32> %c, i32 0 %c1 = extractelement <4 x i32> %c, i32 1 @@ -215,7 +216,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %s1 = select i1 %cmp1, float %a1, float %b1 %s2 = select i1 %cmp2, float %a2, float %b2 %s3 = select i1 %cmp3, float %a3, float %b3 - %ra = insertelement <4 x float> undef, float %s0, i32 0 + %ra = insertelement <4 x float> zeroinitializer, float %s0, i32 0 %rb = insertelement <4 x float> %ra, float %s1, i32 1 %rc = insertelement <4 x float> %rb, float %s2, i32 2 %rd = insertelement <4 x float> %rc, float %s3, i32 3 @@ -228,7 +229,7 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %q6 = fadd float %q4, %q5 %qi = fcmp olt float %q6, %q5 call void @llvm.assume(i1 %qi) - ret <4 x float> undef + ret <4 x float> zeroinitializer } ; Insert in an order different from the vector indices to make sure it @@ -260,7 +261,7 @@ define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float %s1 = select i1 %cmp1, float %a1, float %b1 %s2 = select i1 %cmp2, float %a2, float %b2 %s3 = select i1 %cmp3, float %a3, float %b3 - %ra = insertelement <4 x float> undef, float %s0, i32 2 + %ra = insertelement <4 x float> zeroinitializer, float %s0, i32 2 %rb = insertelement <4 x float> %ra, float %s1, i32 1 %rc = insertelement <4 x float> %rb, float %s2, i32 0 %rd = insertelement <4 x float> %rc, float %s3, i32 3 @@ -298,7 +299,7 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32 %s1 = select i1 %cmp1, float %a1, float %b1 %s2 = select i1 %cmp2, float %a2, float %b2 %s3 = select i1 %cmp3, float %a3, float %b3 - %ra = insertelement <4 x float> undef, float %s0, i32 0 + %ra = insertelement <4 x float> zeroinitializer, float %s0, i32 0 %rb = insertelement <4 x float> %ra, float %s1, i32 1 %rc = insertelement <4 x float> %rb, float %s2, i32 2 %rd = insertelement <4 x float> %rc, float %s3, i32 3 @@ -319,9 +320,10 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[RB2:%.*]] = shufflevector <4 x float> zeroinitializer, <4 x float> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> zeroinitializer, <4 x float> [[TMP12]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 @@ -344,9 +346,9 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x %s1 = select i1 %cmp1, float %a1, float %b1 %s2 = select i1 %cmp2, float %a2, float %b2 %s3 = select i1 %cmp3, float %a3, float %b3 - %ra = insertelement <4 x float> undef, float %s0, i32 0 + %ra = insertelement <4 x float> zeroinitializer, float %s0, i32 0 %rb = insertelement <4 x float> %ra, float %s1, i32 1 - %rc = insertelement <4 x float> undef, float %s2, i32 2 + %rc = insertelement <4 x float> zeroinitializer, float %s2, i32 2 %rd = insertelement <4 x float> %rc, float %s3, i32 3 ret <4 x float> %rd } @@ -359,7 +361,7 @@ define <4 x i32> @reconstruct(<4 x i32> %c) #0 { ; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 ; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 ; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 -; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0 +; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[C0]], i32 0 ; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1 ; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2 ; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3 @@ -369,7 +371,7 @@ define <4 x i32> @reconstruct(<4 x i32> %c) #0 { %c1 = extractelement <4 x i32> %c, i32 1 %c2 = extractelement <4 x i32> %c, i32 2 %c3 = extractelement <4 x i32> %c, i32 3 - %ra = insertelement <4 x i32> undef, i32 %c0, i32 0 + %ra = insertelement <4 x i32> zeroinitializer, i32 %c0, i32 0 %rb = insertelement <4 x i32> %ra, i32 %c1, i32 1 %rc = insertelement <4 x i32> %rb, i32 %c2, i32 2 %rd = insertelement <4 x i32> %rc, i32 %c3, i32 3 @@ -392,13 +394,13 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> % %cmp1 = icmp ne i32 %c1, 0 %s0 = select i1 %cmp0, float %a0, float %b0 %s1 = select i1 %cmp1, float %a1, float %b1 - %ra = insertelement <2 x float> undef, float %s0, i32 0 + %ra = insertelement <2 x float> zeroinitializer, float %s0, i32 0 %rb = insertelement <2 x float> %ra, float %s1, i32 1 ret <2 x float> %rb } ; Make sure when we construct partial vectors, we don't keep -; re-visiting the insertelement chains starting with undef +; re-visiting the insertelement chains starting with zeroinitializer ; (low cost threshold needed to force this to happen) define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_partial_vector( @@ -408,16 +410,16 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 ; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[C0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> zeroinitializer, float [[A0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> zeroinitializer, float [[B0]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 -; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 0 +; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> zeroinitializer, float [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 ; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP10]], i32 1 ; CHECK-NEXT: ret <4 x float> [[RB]] @@ -428,16 +430,16 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, %a1 = extractelement <4 x float> %a, i32 1 %b0 = extractelement <4 x float> %b, i32 0 %b1 = extractelement <4 x float> %b, i32 1 - %1 = insertelement <2 x i32> undef, i32 %c0, i32 0 + %1 = insertelement <2 x i32> zeroinitializer, i32 %c0, i32 0 %2 = insertelement <2 x i32> %1, i32 %c1, i32 1 %3 = icmp ne <2 x i32> %2, zeroinitializer - %4 = insertelement <2 x float> undef, float %a0, i32 0 + %4 = insertelement <2 x float> zeroinitializer, float %a0, i32 0 %5 = insertelement <2 x float> %4, float %a1, i32 1 - %6 = insertelement <2 x float> undef, float %b0, i32 0 + %6 = insertelement <2 x float> zeroinitializer, float %b0, i32 0 %7 = insertelement <2 x float> %6, float %b1, i32 1 %8 = select <2 x i1> %3, <2 x float> %5, <2 x float> %7 %9 = extractelement <2 x float> %8, i32 0 - %ra = insertelement <4 x float> undef, float %9, i32 0 + %ra = insertelement <4 x float> zeroinitializer, float %9, i32 0 %10 = extractelement <2 x float> %8, i32 1 %rb = insertelement <4 x float> %ra, float %10, i32 1 ret <4 x float> %rb @@ -453,7 +455,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { %a0 = extractelement <4 x float> %a, i32 0 %b0 = extractelement <4 x float> %b, i32 0 %c0 = fadd float %a0, %b0 - %v0 = insertelement <4 x float> undef, float %c0, i32 0 + %v0 = insertelement <4 x float> zeroinitializer, float %c0, i32 0 %a1 = extractelement <4 x float> %a, i32 1 %b1 = extractelement <4 x float> %b, i32 1 %c1 = fadd float %a1, %b1 @@ -488,7 +490,7 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { %a3 = extractelement <4 x float> %a, i32 3 %b3 = extractelement <4 x float> %b, i32 3 %c3 = fadd float %a3, %b3 - %v0 = insertelement <4 x float> undef, float %c0, i32 0 + %v0 = insertelement <4 x float> zeroinitializer, float %c0, i32 0 %v1 = insertelement <4 x float> %v0, float %c1, i32 1 %v2 = insertelement <4 x float> %v1, float %c2, i32 2 %v3 = insertelement <4 x float> %v2, float %c3, i32 3 @@ -511,7 +513,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { %t2 = fadd double %y , 2.000000e+00 %t3 = fadd double %z , 3.000000e+00 %t4 = fmul double %t0, 1.000000e+00 - %i1 = insertelement <4 x double> undef, double %t4, i32 3 + %i1 = insertelement <4 x double> zeroinitializer, double %t4, i32 3 %t5 = fmul double %t1, 1.000000e+00 %i2 = insertelement <4 x double> %i1, double %t5, i32 2 %t6 = fmul double %t2, 1.000000e+00 @@ -550,7 +552,7 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr %vecext20 = extractelement <8 x float> %a, i32 7 %vecext21 = extractelement <8 x float> %b, i32 7 %add22 = fadd float %vecext20, %vecext21 - %vecinit.i = insertelement <8 x float> undef, float %add, i32 0 + %vecinit.i = insertelement <8 x float> zeroinitializer, float %add, i32 0 %vecinit1.i = insertelement <8 x float> %vecinit.i, float %add4, i32 1 %vecinit2.i = insertelement <8 x float> %vecinit1.i, float %add7, i32 2 %vecinit3.i = insertelement <8 x float> %vecinit2.i, float %add10, i32 3 From c7ea4c18afa84875ac22b2c98930c793eefd24b2 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Fri, 17 Jan 2025 18:55:24 +0100 Subject: [PATCH 42/88] [clang][bytecode] Revisit global variables separately (#123358) Call `EvaluateAsInitializer()` explicitly here, so we don't abort the evaluation of the `DeflRefExpr` just because the initializer of that global variable failed. --- clang/lib/AST/ByteCode/Compiler.cpp | 14 +++++++++++++- clang/test/AST/ByteCode/cxx98.cpp | 5 +++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index fca8518575594..3ef2b0858e667 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -6210,8 +6210,20 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { return revisit(VD); if ((VD->hasGlobalStorage() || VD->isStaticDataMember()) && - typeShouldBeVisited(VD->getType())) + typeShouldBeVisited(VD->getType())) { + if (const Expr *Init = VD->getAnyInitializer(); + Init && !Init->isValueDependent()) { + // Whether or not the evaluation is successul doesn't really matter + // here -- we will create a global variable in any case, and that + // will have the state of initializer evaluation attached. + APValue V; + SmallVector Notes; + (void)Init->EvaluateAsInitializer(V, Ctx.getASTContext(), VD, Notes, + true); + return this->visitDeclRef(D, E); + } return revisit(VD); + } // FIXME: The evaluateValue() check here is a little ridiculous, since // it will ultimately call into Context::evaluateAsInitializer(). In diff --git a/clang/test/AST/ByteCode/cxx98.cpp b/clang/test/AST/ByteCode/cxx98.cpp index 20f98d33c31c4..c17049b01c1da 100644 --- a/clang/test/AST/ByteCode/cxx98.cpp +++ b/clang/test/AST/ByteCode/cxx98.cpp @@ -59,3 +59,8 @@ struct PR65784s{ int *ptr; } const PR65784[] = {(int *)""}; PR65784s PR65784f() { return *PR65784; } + +const int b = 1 / 0; // both-warning {{division by zero is undefined}} \ + // both-note {{declared here}} +_Static_assert(b, ""); // both-error {{not an integral constant expression}} \ + // both-note {{initializer of 'b' is not a constant expression}} From 8815c505be90edf0168e931d77f2b68e393031d3 Mon Sep 17 00:00:00 2001 From: JoelWee <32009741+JoelWee@users.noreply.github.com> Date: Fri, 17 Jan 2025 17:59:18 +0000 Subject: [PATCH 43/88] [MLIR] Allow setting call stack limit for SourceMgrDiagnosticHandler (#123373) Otherwise for deeply nested code, the callstack will always be truncated --- mlir/include/mlir/IR/Diagnostics.h | 4 +++- mlir/lib/IR/Diagnostics.cpp | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/IR/Diagnostics.h b/mlir/include/mlir/IR/Diagnostics.h index 8429325412dc9..36c433c63b26d 100644 --- a/mlir/include/mlir/IR/Diagnostics.h +++ b/mlir/include/mlir/IR/Diagnostics.h @@ -578,6 +578,9 @@ class SourceMgrDiagnosticHandler : public ScopedDiagnosticHandler { void emitDiagnostic(Location loc, Twine message, DiagnosticSeverity kind, bool displaySourceLine = true); + /// Set the maximum depth that a call stack will be printed. Defaults to 10. + void setCallStackLimit(unsigned limit); + protected: /// Emit the given diagnostic with the held source manager. void emitDiagnostic(Diagnostic &diag); @@ -605,7 +608,6 @@ class SourceMgrDiagnosticHandler : public ScopedDiagnosticHandler { std::optional findLocToShow(Location loc); /// The maximum depth that a call stack will be printed. - /// TODO: This should be a tunable flag. unsigned callStackLimit = 10; std::unique_ptr impl; diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp index 7eb3d5bcd07f1..19b32120f5890 100644 --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -519,6 +519,10 @@ void SourceMgrDiagnosticHandler::emitDiagnostic(Diagnostic &diag) { } } +void SourceMgrDiagnosticHandler::setCallStackLimit(unsigned limit) { + callStackLimit = limit; +} + /// Get a memory buffer for the given file, or nullptr if one is not found. const llvm::MemoryBuffer * SourceMgrDiagnosticHandler::getBufferForFile(StringRef filename) { From fbea21aa52f96fc12e19fa4b1063209bc4d19f99 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 17 Jan 2025 10:06:44 -0800 Subject: [PATCH 44/88] [AMDGPU] Add test for VALU hoisiting from WWM region. NFC. (#123234) The test demonstraits a suboptimal VALU hoisting from a WWM region. As a result we have 2 WWM regions instead of one. --- llvm/test/CodeGen/AMDGPU/licm-wwm.mir | 46 +++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/licm-wwm.mir diff --git a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir new file mode 100644 index 0000000000000..fc20674971a71 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s + +# Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass +# to create a second WWM region. This is an unwanted hoisting. + +--- +name: licm_move_wwm +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: licm_move_wwm + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_32 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GCN-NEXT: $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]] + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[ENTER_STRICT_WWM1:%[0-9]+]]:sreg_32 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec + ; GCN-NEXT: $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM1]] + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[V_READFIRSTLANE_B32_]] + ; GCN-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + S_BRANCH %bb.1 + + bb.1: + %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %1:sreg_32 = V_READFIRSTLANE_B32 killed %0:vgpr_32, implicit $exec + early-clobber %2:sreg_32 = STRICT_WWM killed %1:sreg_32, implicit $exec + $exec_lo = S_OR_B32 $exec_lo, %2, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... From f12e0c9c3a45065543bea89c5545e26690ac3e59 Mon Sep 17 00:00:00 2001 From: Prashanth Date: Fri, 17 Jan 2025 23:49:22 +0530 Subject: [PATCH 45/88] [libc][docs] Add sys/stat page to the status of implementations docs (#122997) These changes ensure that the sys/stat header is documented properly with respect to the issue ( #122006 ) . --- libc/docs/CMakeLists.txt | 1 + libc/docs/headers/index.rst | 1 + libc/utils/docgen/sys/stat.yaml | 118 ++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 libc/utils/docgen/sys/stat.yaml diff --git a/libc/docs/CMakeLists.txt b/libc/docs/CMakeLists.txt index 9974769ca7eb3..f88d7c27f9f6b 100644 --- a/libc/docs/CMakeLists.txt +++ b/libc/docs/CMakeLists.txt @@ -55,6 +55,7 @@ if (SPHINX_FOUND) strings sys/mman sys/resource + sys/stat sys/time sys/wait termios diff --git a/libc/docs/headers/index.rst b/libc/docs/headers/index.rst index 64f84ef09cc35..858b2142defa9 100644 --- a/libc/docs/headers/index.rst +++ b/libc/docs/headers/index.rst @@ -28,6 +28,7 @@ Implementation Status strings sys/mman sys/resource + sys/stat sys/time sys/wait termios diff --git a/libc/utils/docgen/sys/stat.yaml b/libc/utils/docgen/sys/stat.yaml new file mode 100644 index 0000000000000..86dc84a1e06d2 --- /dev/null +++ b/libc/utils/docgen/sys/stat.yaml @@ -0,0 +1,118 @@ +macros: + S_IFMT: + in-latest-posix: '' + S_IFBLK: + in-latest-posix: '' + S_IFCHR: + in-latest-posix: '' + S_IFIFO: + in-latest-posix: '' + S_IFREG: + in-latest-posix: '' + S_IFDIR: + in-latest-posix: '' + S_IFLNK: + in-latest-posix: '' + S_IFSOCK: + in-latest-posix: '' + st_atime: + in-latest-posix: '' + st_ctime: + in-latest-posix: '' + st_mtime: + in-latest-posix: '' + UTIME_NOW: + in-latest-posix: '' + UTIME_OMIT: + in-latest-posix: '' + + S_IRWXU: + in-latest-posix: '' + S_IRUSR: + in-latest-posix: '' + S_IWUSR: + in-latest-posix: '' + S_IXUSR: + in-latest-posix: '' + S_IRWXG: + in-latest-posix: '' + S_IRGRP: + in-latest-posix: '' + S_IWGRP: + in-latest-posix: '' + S_IXGRP: + in-latest-posix: '' + + S_IRWXO: + in-latest-posix: '' + S_IROTH: + in-latest-posix: '' + S_IWOTH: + in-latest-posix: '' + S_IXOTH: + in-latest-posix: '' + S_ISUID: + in-latest-posix: '' + S_ISGID: + in-latest-posix: '' + S_ISVTX: + in-latest-posix: '' + + S_ISBLK: + in-latest-posix: '' + S_ISCHR: + in-latest-posix: '' + S_ISDIR: + in-latest-posix: '' + S_ISFIFO: + in-latest-posix: '' + S_ISREG: + in-latest-posix: '' + S_ISLNK: + in-latest-posix: '' + S_ISSOCK: + in-latest-posix: '' + + S_TYPEISMQ: + in-latest-posix: '' + S_TYPEISSEM: + in-latest-posix: '' + S_TYPEISSHM: + in-latest-posix: '' + + S_TYPEISTMO: + in-latest-posix: '' + +functions: + chmod: + in-latest-posix: '' + fchmod: + in-latest-posix: '' + fchmodat: + in-latest-posix: '' + fstat: + in-latest-posix: '' + fstatat: + in-latest-posix: '' + futimens: + in-latest-posix: '' + lstat: + in-latest-posix: '' + mkdir: + in-latest-posix: '' + mkdirat: + in-latest-posix: '' + mkfifo: + in-latest-posix: '' + mkfifoat: + in-latest-posix: '' + mknod: + in-latest-posix: '' + mknodat: + in-latest-posix: '' + stat: + in-latest-posix: '' + umask: + in-latest-posix: '' + utimensat: + in-latest-posix: '' \ No newline at end of file From 1c3c65590d1635ab1b50c89dc8379c42a5d535bd Mon Sep 17 00:00:00 2001 From: alx32 <103613512+alx32@users.noreply.github.com> Date: Fri, 17 Jan 2025 10:48:32 -0800 Subject: [PATCH 46/88] [lld-macho] Document '-icf' flag options (#123372) Adding the `safe_thunks` option in `Options.td` as it was missing there - mentioned by @Colibrow in https://github.com/llvm/llvm-project/pull/106573 Also documenting what the various options mean. Help now looks like this: ``` .......... --error-limit= Maximum number of errors to print before exiting (default: 20) --help-hidden Display help for hidden options --icf=[none,safe,safe_thunks,all] Set level for identical code folding (default: none). Possible values: none - Disable ICF safe - Only folds non-address significant functions (as described by `__addrsig` section) safe_thunks - Like safe, but replaces address-significant functions with thunks all - Fold all identical functions --ignore-auto-link-option= Ignore a single auto-linked library or framework. Useful to ignore invalid options that ld64 ignores --irpgo-profile-sort= Deprecated. Please use --irpgo-profile and --bp-startup-sort=function .......... ``` --- lld/MachO/Options.td | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 4b1e9e4391070..39191af7dc169 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -82,8 +82,12 @@ def print_dylib_search: Flag<["--"], "print-dylib-search">, HelpText<"Print which paths lld searched when trying to find dylibs">, Group; def icf_eq: Joined<["--"], "icf=">, - HelpText<"Set level for identical code folding (default: none)">, - MetaVarName<"[none,safe,all]">, + HelpText<"Set level for identical code folding (default: none). Possible values:\n" + " none - Disable ICF\n" + " safe - Only folds non-address significant functions (as described by `__addrsig` section)\n" + " safe_thunks - Like safe, but replaces address-significant functions with thunks\n" + " all - Fold all identical functions">, + MetaVarName<"[none,safe,safe_thunks,all]">, Group; def keep_icf_stabs: Joined<["--"], "keep-icf-stabs">, HelpText<"Generate STABS entries for symbols folded by ICF. These entries can then be used by dsymutil to discover the address range where folded symbols are located.">, From 2dc5682dacab2dbb52a771746fdede0e938fc6e9 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 17 Jan 2025 19:16:32 +0000 Subject: [PATCH 47/88] Verifier: Add check for DICompositeType elements being null Came up recently with some nodebug case on codeview, that caused a null entry in elements and crashed LLVM. Original clang fix to avoid generating IR like this: 504dd577675e8c85cdc8525990a7c8b517a38a89 --- llvm/lib/IR/Verifier.cpp | 2 ++ llvm/test/Verifier/dicompositetype-elements-null.ll | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 llvm/test/Verifier/dicompositetype-elements-null.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 7b6f7b5aa6171..00280dbe5300b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1319,6 +1319,8 @@ void Verifier::visitDICompositeType(const DICompositeType &N) { unsigned DIBlockByRefStruct = 1 << 4; CheckDI((N.getFlags() & DIBlockByRefStruct) == 0, "DIBlockByRefStruct on DICompositeType is no longer supported", &N); + CheckDI(llvm::all_of(N.getElements(), [](const DINode *N) { return N; }), + "DISubprogram contains null entry in `elements` field", &N); if (N.isVector()) { const DINodeArray Elements = N.getElements(); diff --git a/llvm/test/Verifier/dicompositetype-elements-null.ll b/llvm/test/Verifier/dicompositetype-elements-null.ll new file mode 100644 index 0000000000000..c0aca47852bac --- /dev/null +++ b/llvm/test/Verifier/dicompositetype-elements-null.ll @@ -0,0 +1,6 @@ +; RUN: not opt -S < %s 2>&1 | FileCheck %s + +!named = !{!0} +; CHECK: DISubprogram contains null entry in `elements` field +!0 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", elements: !1) +!1 = !{null} From bbd871e2baad2e74dbde202823b3439d2a96d3f8 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 17 Jan 2025 14:22:40 -0500 Subject: [PATCH 48/88] [libc++] Don't implement before C++23 (#123130) https://github.com/llvm/llvm-project/pull/95498 implemented a libc++ extension where would forward to even before C++23. Unfortunately, this was found to be a breaking change (with fairly widespread impact) since that changes whether _Atomic(T) is a C style atomic or std::atomic. In principle, this can even be an ABI break. We generally don't implement extensions in libc++ because they cause so many problems, and that extension had been accepted because it was deemed pretty small and only a quality of life improvement. Since it has widespread impact on valid C++20 (and before) code, this patch removes the extension before we ship it in any public release. --- libcxx/include/atomic | 10 +++++++ libcxx/include/stdatomic.h | 10 +++++-- ...compatible_with_stdatomic.compile.pass.cpp | 11 ++++---- .../incompatible_with_stdatomic.verify.cpp | 22 +++++++++++++++ .../dont_hijack_header.compile.pass.cpp | 24 ++++++++++++++++ .../dont_hijack_header.cxx23.compile.pass.cpp | 28 +++++++++++++++++++ 6 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 libcxx/test/libcxx/atomics/atomics.syn/incompatible_with_stdatomic.verify.cpp create mode 100644 libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.compile.pass.cpp create mode 100644 libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.cxx23.compile.pass.cpp diff --git a/libcxx/include/atomic b/libcxx/include/atomic index 80f9e437bfaab..75af5de33ca4c 100644 --- a/libcxx/include/atomic +++ b/libcxx/include/atomic @@ -592,6 +592,16 @@ template #else # include <__config> +# if defined(_LIBCPP_STDATOMIC_H) || defined(kill_dependency) || defined(atomic_load) +# define _LIBCPP_STDATOMIC_H_HAS_DEFINITELY_BEEN_INCLUDED 1 +# else +# define _LIBCPP_STDATOMIC_H_HAS_DEFINITELY_BEEN_INCLUDED 0 +# endif + +# if _LIBCPP_STD_VER < 23 && _LIBCPP_STDATOMIC_H_HAS_DEFINITELY_BEEN_INCLUDED +# error is incompatible with before C++23. Please compile with -std=c++23. +# endif + # include <__atomic/aliases.h> # include <__atomic/atomic.h> # include <__atomic/atomic_flag.h> diff --git a/libcxx/include/stdatomic.h b/libcxx/include/stdatomic.h index a0b46e3b7bc17..2991030eee456 100644 --- a/libcxx/include/stdatomic.h +++ b/libcxx/include/stdatomic.h @@ -126,7 +126,7 @@ using std::atomic_signal_fence // see below # pragma GCC system_header # endif -# if defined(__cplusplus) +# if defined(__cplusplus) && _LIBCPP_STD_VER >= 23 # include # include @@ -231,13 +231,17 @@ using std::atomic_store_explicit _LIBCPP_USING_IF_EXISTS; using std::atomic_signal_fence _LIBCPP_USING_IF_EXISTS; using std::atomic_thread_fence _LIBCPP_USING_IF_EXISTS; -# else +# elif defined(_LIBCPP_COMPILER_CLANG_BASED) +// Before C++23, we include the next on the path to avoid hijacking +// the header. We do this because Clang has historically shipped a +// header that would be available in all Standard modes, and we don't want to +// break that use case. # if __has_include_next() # include_next # endif -# endif // defined(__cplusplus) +# endif // defined(__cplusplus) && _LIBCPP_STD_VER >= 23 #endif // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_STDATOMIC_H diff --git a/libcxx/test/libcxx/atomics/atomics.syn/compatible_with_stdatomic.compile.pass.cpp b/libcxx/test/libcxx/atomics/atomics.syn/compatible_with_stdatomic.compile.pass.cpp index 323072da14463..30e9672a25683 100644 --- a/libcxx/test/libcxx/atomics/atomics.syn/compatible_with_stdatomic.compile.pass.cpp +++ b/libcxx/test/libcxx/atomics/atomics.syn/compatible_with_stdatomic.compile.pass.cpp @@ -7,15 +7,16 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // XFAIL: FROZEN-CXX03-HEADERS-FIXME -// This test verifies that redirects to . As an extension, -// libc++ enables this redirection even before C++23. +// This test verifies that redirects to . -// Ordinarily, can be included after , but including it -// first doesn't work because its macros break . Verify that -// can be included first. +// Before C++23, can be included after , but including it +// first doesn't work because its macros break . Fixing that is the point +// of the C++23 change that added to C++. Thus, this test verifies +// that can be included first. #include #include diff --git a/libcxx/test/libcxx/atomics/atomics.syn/incompatible_with_stdatomic.verify.cpp b/libcxx/test/libcxx/atomics/atomics.syn/incompatible_with_stdatomic.verify.cpp new file mode 100644 index 0000000000000..ca092d9c60275 --- /dev/null +++ b/libcxx/test/libcxx/atomics/atomics.syn/incompatible_with_stdatomic.verify.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: no-threads +// REQUIRES: c++03 || c++11 || c++14 || c++17 || c++20 + +// This test ensures that we issue a reasonable diagnostic when including after +// has been included. Before C++23, this otherwise leads to obscure errors +// because may try to redefine things defined by . + +// Ignore additional weird errors that happen when the two headers are mixed. +// ADDITIONAL_COMPILE_FLAGS: -Xclang -verify-ignore-unexpected=error -Xclang -verify-ignore-unexpected=warning + +#include +#include + +// expected-error@*:* {{ is incompatible with before C++23.}} diff --git a/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.compile.pass.cpp b/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.compile.pass.cpp new file mode 100644 index 0000000000000..6df80daf9414e --- /dev/null +++ b/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.compile.pass.cpp @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: no-threads + +// This test ensures that we don't hijack the header (e.g. by providing +// an empty header) even when compiling before C++23, since some users were using the +// Clang or platform provided header before libc++ added its own. + +// On GCC, the compiler-provided is not C++ friendly, so including +// doesn't work at all if we don't use the provided by libc++ in C++23 and above. +// XFAIL: (c++11 || c++14 || c++17 || c++20) && gcc + +#include + +void f() { + atomic_int i; // just make sure the header isn't empty + (void)i; +} diff --git a/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.cxx23.compile.pass.cpp b/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.cxx23.compile.pass.cpp new file mode 100644 index 0000000000000..a8a99e6937f31 --- /dev/null +++ b/libcxx/test/libcxx/atomics/stdatomic.h.syn/dont_hijack_header.cxx23.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: no-threads + +// This test verifies that DOES NOT redirect to before C++23, +// since doing so is a breaking change. Several things can break when that happens, +// because the type of _Atomic(T) changes from _Atomic(T) to std::atomic. +// +// For example, redeclarations can become invalid depending on whether they +// have been declared with in scope or not. + +// REQUIRES: c++03 || c++11 || c++14 || c++17 || c++20 + +// On GCC, the compiler-provided is not C++ friendly, so including +// doesn't work at all if we don't use the provided by libc++ in C++23 and above. +// XFAIL: (c++11 || c++14 || c++17 || c++20) && gcc + +#include +#include +#include + +static_assert(!std::is_same<_Atomic(int), std::atomic >::value, ""); From 58a70dffcc096b54537aa99a045346ee28ad2592 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 17 Jan 2025 11:27:39 -0800 Subject: [PATCH 49/88] [msan] Add debugging for handleUnknownIntrinsic (#123381) This adds an experimental flag, msan-dump-strict-intrinsics (modeled after msan-dump-strict-instructions), which prints out any intrinsics that are heuristically handled. Additionally, MSan will print out heuristically handled intrinsics when -debug is passed as a flag in debug builds. MSan's intrinsic handling can be broken down into: 1) special cases (usually highly accurate) 2) heuristic handling (sometimes erroneous) 3) not handled This patch's -msan-dump-strict-intrinsics is intended to help debug Case 2. Case 3) (which includes all the heuristics that are not handled by special cases nor heuristics) can be debugged using the existing -msan-dump-strict-instructions. --- .../Instrumentation/MemorySanitizer.cpp | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 0169320deae46..6daee7a3b6e81 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -318,6 +318,13 @@ static cl::opt ClDumpStrictInstructions( cl::desc("print out instructions with default strict semantics"), cl::Hidden, cl::init(false)); +static cl::opt ClDumpStrictIntrinsics( + "msan-dump-strict-intrinsics", + cl::desc("Prints 'unknown' intrinsics that were handled heuristically. " + "Use -msan-dump-strict-instructions to print intrinsics that " + "could not be handled exactly nor heuristically."), + cl::Hidden, cl::init(false)); + static cl::opt ClInstrumentationWithCallThreshold( "msan-instrumentation-with-call-threshold", cl::desc( @@ -3014,7 +3021,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// /// We special-case intrinsics where this approach fails. See llvm.bswap /// handling as an example of that. - bool handleUnknownIntrinsic(IntrinsicInst &I) { + bool handleUnknownIntrinsicUnlogged(IntrinsicInst &I) { unsigned NumArgOperands = I.arg_size(); if (NumArgOperands == 0) return false; @@ -3040,6 +3047,18 @@ struct MemorySanitizerVisitor : public InstVisitor { return false; } + bool handleUnknownIntrinsic(IntrinsicInst &I) { + if (handleUnknownIntrinsicUnlogged(I)) { + if (ClDumpStrictIntrinsics) + dumpInst(I); + + LLVM_DEBUG(dbgs() << "UNKNOWN INTRINSIC HANDLED HEURISTICALLY: " << I + << "\n"); + return true; + } else + return false; + } + void handleInvariantGroup(IntrinsicInst &I) { setShadow(&I, getShadow(&I, 0)); setOrigin(&I, getOrigin(&I, 0)); From 6716ce8b641f0e42e2343e1694ee578b027be0c4 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 17 Jan 2025 19:35:57 +0000 Subject: [PATCH 50/88] Revert "Verifier: Add check for DICompositeType elements being null" Asserts on various tests/buildbots, at least one example is DebugInfo/X86/set.ll This reverts commit 2dc5682dacab2dbb52a771746fdede0e938fc6e9. --- llvm/lib/IR/Verifier.cpp | 2 -- llvm/test/Verifier/dicompositetype-elements-null.ll | 6 ------ 2 files changed, 8 deletions(-) delete mode 100644 llvm/test/Verifier/dicompositetype-elements-null.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 00280dbe5300b..7b6f7b5aa6171 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1319,8 +1319,6 @@ void Verifier::visitDICompositeType(const DICompositeType &N) { unsigned DIBlockByRefStruct = 1 << 4; CheckDI((N.getFlags() & DIBlockByRefStruct) == 0, "DIBlockByRefStruct on DICompositeType is no longer supported", &N); - CheckDI(llvm::all_of(N.getElements(), [](const DINode *N) { return N; }), - "DISubprogram contains null entry in `elements` field", &N); if (N.isVector()) { const DINodeArray Elements = N.getElements(); diff --git a/llvm/test/Verifier/dicompositetype-elements-null.ll b/llvm/test/Verifier/dicompositetype-elements-null.ll deleted file mode 100644 index c0aca47852bac..0000000000000 --- a/llvm/test/Verifier/dicompositetype-elements-null.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: not opt -S < %s 2>&1 | FileCheck %s - -!named = !{!0} -; CHECK: DISubprogram contains null entry in `elements` field -!0 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", elements: !1) -!1 = !{null} From eddeb36cf1ced0e14e17ac90f60922366e382100 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 17 Jan 2025 14:46:14 -0500 Subject: [PATCH 51/88] [SPIRV] add pre legalization instruction combine (#122839) - Add the boilerplate to support instcombine in SPIRV - instcombine length(X-Y) to distance(X,Y) - switch HLSL's distance intrinsic to not special case for SPIRV. - fixes #122766 - This RFC we were requested to add in the infra for pattern matching: https://discourse.llvm.org/t/rfc-add-targetbuiltins-for-spirv-to-support-hlsl/83329/13 --- clang/include/clang/Basic/BuiltinsSPIRV.td | 6 + clang/lib/CodeGen/CGBuiltin.cpp | 10 + clang/lib/Headers/hlsl/hlsl_detail.h | 8 +- clang/lib/Sema/SemaSPIRV.cpp | 18 ++ clang/test/CodeGenHLSL/builtins/distance.hlsl | 30 ++- clang/test/CodeGenHLSL/builtins/length.hlsl | 95 +++++-- clang/test/CodeGenSPIRV/Builtins/length.c | 31 +++ clang/test/SemaSPIRV/BuiltIns/length-errors.c | 25 ++ llvm/lib/Target/SPIRV/CMakeLists.txt | 3 + llvm/lib/Target/SPIRV/SPIRV.h | 2 + llvm/lib/Target/SPIRV/SPIRV.td | 1 + llvm/lib/Target/SPIRV/SPIRVCombine.td | 22 ++ llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp | 8 + .../SPIRV/SPIRVPreLegalizerCombiner.cpp | 244 ++++++++++++++++++ llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 2 + ...relegalizercombiner-length-to-distance.mir | 26 ++ .../CodeGen/SPIRV/hlsl-intrinsics/distance.ll | 77 +++--- llvm/test/CodeGen/SPIRV/opencl/distance.ll | 11 + 18 files changed, 547 insertions(+), 72 deletions(-) create mode 100644 clang/test/CodeGenSPIRV/Builtins/length.c create mode 100644 clang/test/SemaSPIRV/BuiltIns/length-errors.c create mode 100644 llvm/lib/Target/SPIRV/SPIRVCombine.td create mode 100644 llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp create mode 100644 llvm/test/CodeGen/SPIRV/GlobalISel/InstCombine/prelegalizercombiner-length-to-distance.mir diff --git a/clang/include/clang/Basic/BuiltinsSPIRV.td b/clang/include/clang/Basic/BuiltinsSPIRV.td index 1e66939b822ef..f72c555921dfe 100644 --- a/clang/include/clang/Basic/BuiltinsSPIRV.td +++ b/clang/include/clang/Basic/BuiltinsSPIRV.td @@ -13,3 +13,9 @@ def SPIRVDistance : Builtin { let Attributes = [NoThrow, Const]; let Prototype = "void(...)"; } + +def SPIRVLength : Builtin { + let Spellings = ["__builtin_spirv_length"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2385f2a320b62..b80833fd91884 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -20528,6 +20528,16 @@ Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_distance, ArrayRef{X, Y}, nullptr, "spv.distance"); } + case SPIRV::BI__builtin_spirv_length: { + Value *X = EmitScalarExpr(E->getArg(0)); + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + "length operand must have a float representation"); + assert(E->getArg(0)->getType()->isVectorType() && + "length operand must be a vector"); + return Builder.CreateIntrinsic( + /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_length, + ArrayRef{X}, nullptr, "spv.length"); + } } return nullptr; } diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index 3eb4a3dc861e3..b2c8cc6c5c3db 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -61,7 +61,11 @@ length_impl(T X) { template constexpr enable_if_t::value || is_same::value, T> length_vec_impl(vector X) { +#if (__has_builtin(__builtin_spirv_length)) + return __builtin_spirv_length(X); +#else return __builtin_elementwise_sqrt(__builtin_hlsl_dot(X, X)); +#endif } template @@ -73,11 +77,7 @@ distance_impl(T X, T Y) { template constexpr enable_if_t::value || is_same::value, T> distance_vec_impl(vector X, vector Y) { -#if (__has_builtin(__builtin_spirv_distance)) - return __builtin_spirv_distance(X, Y); -#else return length_vec_impl(X - Y); -#endif } } // namespace __detail } // namespace hlsl diff --git a/clang/lib/Sema/SemaSPIRV.cpp b/clang/lib/Sema/SemaSPIRV.cpp index d2de64826c6eb..dc49fc7907357 100644 --- a/clang/lib/Sema/SemaSPIRV.cpp +++ b/clang/lib/Sema/SemaSPIRV.cpp @@ -51,6 +51,24 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(unsigned BuiltinID, TheCall->setType(RetTy); break; } + case SPIRV::BI__builtin_spirv_length: { + if (SemaRef.checkArgCount(TheCall, 1)) + return true; + ExprResult A = TheCall->getArg(0); + QualType ArgTyA = A.get()->getType(); + auto *VTy = ArgTyA->getAs(); + if (VTy == nullptr) { + SemaRef.Diag(A.get()->getBeginLoc(), + diag::err_typecheck_convert_incompatible) + << ArgTyA + << SemaRef.Context.getVectorType(ArgTyA, 2, VectorKind::Generic) << 1 + << 0 << 0; + return true; + } + QualType RetTy = VTy->getElementType(); + TheCall->setType(RetTy); + break; + } } return false; } diff --git a/clang/test/CodeGenHLSL/builtins/distance.hlsl b/clang/test/CodeGenHLSL/builtins/distance.hlsl index 6952700a87f1d..e830903261c8c 100644 --- a/clang/test/CodeGenHLSL/builtins/distance.hlsl +++ b/clang/test/CodeGenHLSL/builtins/distance.hlsl @@ -33,8 +33,9 @@ half test_distance_half(half X, half Y) { return distance(X, Y); } // SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z19test_distance_half2Dv2_DhS_( // SPVCHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[X:%.*]], <2 x half> noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // SPVCHECK-NEXT: [[ENTRY:.*:]] -// SPVCHECK-NEXT: [[SPV_DISTANCE_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.spv.distance.v2f16(<2 x half> [[X]], <2 x half> [[Y]]) -// SPVCHECK-NEXT: ret half [[SPV_DISTANCE_I]] +// SPVCHECK-NEXT: [[SUB_I:%.*]] = fsub reassoc nnan ninf nsz arcp afn <2 x half> [[X]], [[Y]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.spv.length.v2f16(<2 x half> [[SUB_I]]) +// SPVCHECK-NEXT: ret half [[SPV_LENGTH_I]] // half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); } @@ -49,8 +50,9 @@ half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); } // SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z19test_distance_half3Dv3_DhS_( // SPVCHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[X:%.*]], <3 x half> noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // SPVCHECK-NEXT: [[ENTRY:.*:]] -// SPVCHECK-NEXT: [[SPV_DISTANCE_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.spv.distance.v3f16(<3 x half> [[X]], <3 x half> [[Y]]) -// SPVCHECK-NEXT: ret half [[SPV_DISTANCE_I]] +// SPVCHECK-NEXT: [[SUB_I:%.*]] = fsub reassoc nnan ninf nsz arcp afn <3 x half> [[X]], [[Y]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.spv.length.v3f16(<3 x half> [[SUB_I]]) +// SPVCHECK-NEXT: ret half [[SPV_LENGTH_I]] // half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); } @@ -65,8 +67,9 @@ half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); } // SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z19test_distance_half4Dv4_DhS_( // SPVCHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[X:%.*]], <4 x half> noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // SPVCHECK-NEXT: [[ENTRY:.*:]] -// SPVCHECK-NEXT: [[SPV_DISTANCE_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.spv.distance.v4f16(<4 x half> [[X]], <4 x half> [[Y]]) -// SPVCHECK-NEXT: ret half [[SPV_DISTANCE_I]] +// SPVCHECK-NEXT: [[SUB_I:%.*]] = fsub reassoc nnan ninf nsz arcp afn <4 x half> [[X]], [[Y]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.spv.length.v4f16(<4 x half> [[SUB_I]]) +// SPVCHECK-NEXT: ret half [[SPV_LENGTH_I]] // half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); } @@ -97,8 +100,9 @@ float test_distance_float(float X, float Y) { return distance(X, Y); } // SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z20test_distance_float2Dv2_fS_( // SPVCHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[X:%.*]], <2 x float> noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // SPVCHECK-NEXT: [[ENTRY:.*:]] -// SPVCHECK-NEXT: [[SPV_DISTANCE_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.spv.distance.v2f32(<2 x float> [[X]], <2 x float> [[Y]]) -// SPVCHECK-NEXT: ret float [[SPV_DISTANCE_I]] +// SPVCHECK-NEXT: [[SUB_I:%.*]] = fsub reassoc nnan ninf nsz arcp afn <2 x float> [[X]], [[Y]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.spv.length.v2f32(<2 x float> [[SUB_I]]) +// SPVCHECK-NEXT: ret float [[SPV_LENGTH_I]] // float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); } @@ -113,8 +117,9 @@ float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); } // SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z20test_distance_float3Dv3_fS_( // SPVCHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[X:%.*]], <3 x float> noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // SPVCHECK-NEXT: [[ENTRY:.*:]] -// SPVCHECK-NEXT: [[SPV_DISTANCE_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.spv.distance.v3f32(<3 x float> [[X]], <3 x float> [[Y]]) -// SPVCHECK-NEXT: ret float [[SPV_DISTANCE_I]] +// SPVCHECK-NEXT: [[SUB_I:%.*]] = fsub reassoc nnan ninf nsz arcp afn <3 x float> [[X]], [[Y]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.spv.length.v3f32(<3 x float> [[SUB_I]]) +// SPVCHECK-NEXT: ret float [[SPV_LENGTH_I]] // float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); } @@ -129,7 +134,8 @@ float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); } // SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z20test_distance_float4Dv4_fS_( // SPVCHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[X:%.*]], <4 x float> noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { // SPVCHECK-NEXT: [[ENTRY:.*:]] -// SPVCHECK-NEXT: [[SPV_DISTANCE_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.spv.distance.v4f32(<4 x float> [[X]], <4 x float> [[Y]]) -// SPVCHECK-NEXT: ret float [[SPV_DISTANCE_I]] +// SPVCHECK-NEXT: [[SUB_I:%.*]] = fsub reassoc nnan ninf nsz arcp afn <4 x float> [[X]], [[Y]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.spv.length.v4f32(<4 x float> [[SUB_I]]) +// SPVCHECK-NEXT: ret float [[SPV_LENGTH_I]] // float test_distance_float4(float4 X, float4 Y) { return distance(X, Y); } diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl index fcf3ee76ba5bb..2d4bbd995298f 100644 --- a/clang/test/CodeGenHLSL/builtins/length.hlsl +++ b/clang/test/CodeGenHLSL/builtins/length.hlsl @@ -1,114 +1,163 @@ -// RUN: %clang_cc1 -finclude-default-header -triple \ -// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,DXCHECK \ -// RUN: -DTARGET=dx +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -O1 -o - | FileCheck %s // RUN: %clang_cc1 -finclude-default-header -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,SPVCHECK \ -// RUN: -DTARGET=spv +// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefix=SPVCHECK -// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z16test_length_halfDh( // DXCHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh( +// + +// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh( // CHECK-SAME: half noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.fabs.f16(half [[P0]]) // CHECK-NEXT: ret half [[ELT_ABS_I]] // - +// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z16test_length_halfDh( +// SPVCHECK-SAME: half noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// SPVCHECK-NEXT: [[ENTRY:.*:]] +// SPVCHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.fabs.f16(half [[P0]]) +// SPVCHECK-NEXT: ret half [[ELT_ABS_I]] +// half test_length_half(half p0) { return length(p0); } -// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh( // DXCHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh( +// + + +// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh( // CHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].fdot.v2f16(<2 x half> [[P0]], <2 x half> [[P0]]) +// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.dx.fdot.v2f16(<2 x half> [[P0]], <2 x half> [[P0]]) // CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[HLSL_DOT_I]]) // CHECK-NEXT: ret half [[TMP0]] // - - +// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh( +// SPVCHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPVCHECK-NEXT: [[ENTRY:.*:]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.spv.length.v2f16(<2 x half> [[P0]]) +// SPVCHECK-NEXT: ret half [[SPV_LENGTH_I]] +// half test_length_half2(half2 p0) { return length(p0); } -// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh( // DXCHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh( +// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh( // CHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].fdot.v3f16(<3 x half> [[P0]], <3 x half> [[P0]]) +// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.dx.fdot.v3f16(<3 x half> [[P0]], <3 x half> [[P0]]) // CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[HLSL_DOT_I]]) // CHECK-NEXT: ret half [[TMP0]] // +// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh( +// SPVCHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPVCHECK-NEXT: [[ENTRY:.*:]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.spv.length.v3f16(<3 x half> [[P0]]) +// SPVCHECK-NEXT: ret half [[SPV_LENGTH_I]] +// half test_length_half3(half3 p0) { return length(p0); } -// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh( // DXCHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh( +// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh( // CHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].fdot.v4f16(<4 x half> [[P0]], <4 x half> [[P0]]) +// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.dx.fdot.v4f16(<4 x half> [[P0]], <4 x half> [[P0]]) // CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[HLSL_DOT_I]]) // CHECK-NEXT: ret half [[TMP0]] // +// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh( +// SPVCHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPVCHECK-NEXT: [[ENTRY:.*:]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.spv.length.v4f16(<4 x half> [[P0]]) +// SPVCHECK-NEXT: ret half [[SPV_LENGTH_I]] +// half test_length_half4(half4 p0) { return length(p0); } -// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z17test_length_floatf( // DXCHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf( // CHECK-SAME: float noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.fabs.f32(float [[P0]]) // CHECK-NEXT: ret float [[ELT_ABS_I]] // +// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z17test_length_floatf( +// SPVCHECK-SAME: float noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPVCHECK-NEXT: [[ENTRY:.*:]] +// SPVCHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.fabs.f32(float [[P0]]) +// SPVCHECK-NEXT: ret float [[ELT_ABS_I]] +// float test_length_float(float p0) { return length(p0); } -// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f( // DXCHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f( // CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].fdot.v2f32(<2 x float> [[P0]], <2 x float> [[P0]]) +// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn float @llvm.dx.fdot.v2f32(<2 x float> [[P0]], <2 x float> [[P0]]) // CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[HLSL_DOT_I]]) // CHECK-NEXT: ret float [[TMP0]] // +// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f( +// SPVCHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPVCHECK-NEXT: [[ENTRY:.*:]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.spv.length.v2f32(<2 x float> [[P0]]) +// SPVCHECK-NEXT: ret float [[SPV_LENGTH_I]] +// float test_length_float2(float2 p0) { return length(p0); } -// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f( // DXCHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f( // CHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].fdot.v3f32(<3 x float> [[P0]], <3 x float> [[P0]]) +// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn float @llvm.dx.fdot.v3f32(<3 x float> [[P0]], <3 x float> [[P0]]) // CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[HLSL_DOT_I]]) // CHECK-NEXT: ret float [[TMP0]] // +// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f( +// SPVCHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPVCHECK-NEXT: [[ENTRY:.*:]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.spv.length.v3f32(<3 x float> [[P0]]) +// SPVCHECK-NEXT: ret float [[SPV_LENGTH_I]] +// float test_length_float3(float3 p0) { return length(p0); } -// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f( // DXCHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f( +// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f( // CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].fdot.v4f32(<4 x float> [[P0]], <4 x float> [[P0]]) +// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn float @llvm.dx.fdot.v4f32(<4 x float> [[P0]], <4 x float> [[P0]]) // CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[HLSL_DOT_I]]) // CHECK-NEXT: ret float [[TMP0]] // +// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f( +// SPVCHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPVCHECK-NEXT: [[ENTRY:.*:]] +// SPVCHECK-NEXT: [[SPV_LENGTH_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.spv.length.v4f32(<4 x float> [[P0]]) +// SPVCHECK-NEXT: ret float [[SPV_LENGTH_I]] +// float test_length_float4(float4 p0) { return length(p0); diff --git a/clang/test/CodeGenSPIRV/Builtins/length.c b/clang/test/CodeGenSPIRV/Builtins/length.c new file mode 100644 index 0000000000000..59e7c298dd816 --- /dev/null +++ b/clang/test/CodeGenSPIRV/Builtins/length.c @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 + +// RUN: %clang_cc1 -O1 -triple spirv-pc-vulkan-compute %s -emit-llvm -o - | FileCheck %s + +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define spir_func float @test_length_float2( +// CHECK-SAME: <2 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SPV_LENGTH:%.*]] = tail call float @llvm.spv.length.v2f32(<2 x float> [[X]]) +// CHECK-NEXT: ret float [[SPV_LENGTH]] +// +float test_length_float2(float2 X) { return __builtin_spirv_length(X); } + +// CHECK-LABEL: define spir_func float @test_length_float3( +// CHECK-SAME: <3 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SPV_LENGTH:%.*]] = tail call float @llvm.spv.length.v3f32(<3 x float> [[X]]) +// CHECK-NEXT: ret float [[SPV_LENGTH]] +// +float test_length_float3(float3 X) { return __builtin_spirv_length(X); } + +// CHECK-LABEL: define spir_func float @test_length_float4( +// CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SPV_LENGTH:%.*]] = tail call float @llvm.spv.length.v4f32(<4 x float> [[X]]) +// CHECK-NEXT: ret float [[SPV_LENGTH]] +// +float test_length_float4(float4 X) { return __builtin_spirv_length(X); } diff --git a/clang/test/SemaSPIRV/BuiltIns/length-errors.c b/clang/test/SemaSPIRV/BuiltIns/length-errors.c new file mode 100644 index 0000000000000..3244bd6737f11 --- /dev/null +++ b/clang/test/SemaSPIRV/BuiltIns/length-errors.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 %s -triple spirv-pc-vulkan-compute -verify + +typedef float float2 __attribute__((ext_vector_type(2))); + +void test_too_few_arg() +{ + return __builtin_spirv_length(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +void test_too_many_arg(float2 p0) +{ + return __builtin_spirv_length(p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +float test_double_scalar_inputs(double p0) { + return __builtin_spirv_length(p0); + // expected-error@-1 {{passing 'double' to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(double)))) double' (vector of 2 'double' values)}} +} + +float test_int_scalar_inputs(int p0) { + return __builtin_spirv_length(p0); + // expected-error@-1 {{passing 'int' to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(int)))) int' (vector of 2 'int' values)}} +} diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt index a79e19fcd753d..efdd8c8d24fbd 100644 --- a/llvm/lib/Target/SPIRV/CMakeLists.txt +++ b/llvm/lib/Target/SPIRV/CMakeLists.txt @@ -10,6 +10,8 @@ tablegen(LLVM SPIRVGenRegisterBank.inc -gen-register-bank) tablegen(LLVM SPIRVGenRegisterInfo.inc -gen-register-info) tablegen(LLVM SPIRVGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM SPIRVGenTables.inc -gen-searchable-tables) +tablegen(LLVM SPIRVGenPreLegalizeGICombiner.inc -gen-global-isel-combiner + -combiners="SPIRVPreLegalizerCombiner") add_public_tablegen_target(SPIRVCommonTableGen) @@ -33,6 +35,7 @@ add_llvm_target(SPIRVCodeGen SPIRVModuleAnalysis.cpp SPIRVStructurizer.cpp SPIRVPreLegalizer.cpp + SPIRVPreLegalizerCombiner.cpp SPIRVPostLegalizer.cpp SPIRVPrepareFunctions.cpp SPIRVRegisterBankInfo.cpp diff --git a/llvm/lib/Target/SPIRV/SPIRV.h b/llvm/lib/Target/SPIRV/SPIRV.h index 81b5720264425..6d00a046ff7ca 100644 --- a/llvm/lib/Target/SPIRV/SPIRV.h +++ b/llvm/lib/Target/SPIRV/SPIRV.h @@ -24,6 +24,7 @@ FunctionPass *createSPIRVStructurizerPass(); FunctionPass *createSPIRVMergeRegionExitTargetsPass(); FunctionPass *createSPIRVStripConvergenceIntrinsicsPass(); FunctionPass *createSPIRVRegularizerPass(); +FunctionPass *createSPIRVPreLegalizerCombiner(); FunctionPass *createSPIRVPreLegalizerPass(); FunctionPass *createSPIRVPostLegalizerPass(); ModulePass *createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM); @@ -36,6 +37,7 @@ createSPIRVInstructionSelector(const SPIRVTargetMachine &TM, void initializeSPIRVModuleAnalysisPass(PassRegistry &); void initializeSPIRVConvergenceRegionAnalysisWrapperPassPass(PassRegistry &); void initializeSPIRVPreLegalizerPass(PassRegistry &); +void initializeSPIRVPreLegalizerCombinerPass(PassRegistry &); void initializeSPIRVPostLegalizerPass(PassRegistry &); void initializeSPIRVStructurizerPass(PassRegistry &); void initializeSPIRVEmitIntrinsicsPass(PassRegistry &); diff --git a/llvm/lib/Target/SPIRV/SPIRV.td b/llvm/lib/Target/SPIRV/SPIRV.td index 108c7e6d3861f..39a4131c7f1bd 100644 --- a/llvm/lib/Target/SPIRV/SPIRV.td +++ b/llvm/lib/Target/SPIRV/SPIRV.td @@ -11,6 +11,7 @@ include "llvm/Target/Target.td" include "SPIRVRegisterInfo.td" include "SPIRVRegisterBanks.td" include "SPIRVInstrInfo.td" +include "SPIRVCombine.td" include "SPIRVBuiltins.td" def SPIRVInstrInfo : InstrInfo; diff --git a/llvm/lib/Target/SPIRV/SPIRVCombine.td b/llvm/lib/Target/SPIRV/SPIRVCombine.td new file mode 100644 index 0000000000000..6f726e024de52 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVCombine.td @@ -0,0 +1,22 @@ +//=- SPIRVCombine.td - Define SPIRV Combine Rules -------------*-tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +include "llvm/Target/GlobalISel/Combine.td" + + +def vector_length_sub_to_distance_lowering : GICombineRule < + (defs root:$root), + (match (wip_match_opcode G_INTRINSIC):$root, + [{ return matchLengthToDistance(*${root}, MRI); }]), + (apply [{ applySPIRVDistance(*${root}, MRI, B); }]) +>; + +def SPIRVPreLegalizerCombiner + : GICombiner<"SPIRVPreLegalizerCombinerImpl", + [vector_length_sub_to_distance_lowering]> { + let CombineAllMethodName = "tryCombineAllImpl"; +} diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 5b4c84918ab48..b5ef8d2a9286f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -17,6 +17,8 @@ #include "SPIRVUtils.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -35,9 +37,15 @@ class SPIRVPreLegalizer : public MachineFunctionPass { initializeSPIRVPreLegalizerPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; }; } // namespace +void SPIRVPreLegalizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + static void addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR, const SPIRVSubtarget &STI, diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp new file mode 100644 index 0000000000000..269524b2410c2 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp @@ -0,0 +1,244 @@ + +//===-- SPIRVPreLegalizerCombiner.cpp - combine legalization ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does combining of machine instructions at the generic MI level, +// before the legalizer. +// +//===----------------------------------------------------------------------===// + +#include "SPIRV.h" +#include "SPIRVTargetMachine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/Support/Debug.h" + +#define GET_GICOMBINER_DEPS +#include "SPIRVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_DEPS + +#define DEBUG_TYPE "spirv-prelegalizer-combiner" + +using namespace llvm; +using namespace MIPatternMatch; + +namespace { + +#define GET_GICOMBINER_TYPES +#include "SPIRVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_TYPES + +/// This match is part of a combine that +/// rewrites length(X - Y) to distance(X, Y) +/// (f32 (g_intrinsic length +/// (g_fsub (vXf32 X) (vXf32 Y)))) +/// -> +/// (f32 (g_intrinsic distance +/// (vXf32 X) (vXf32 Y))) +/// +bool matchLengthToDistance(MachineInstr &MI, MachineRegisterInfo &MRI) { + if (MI.getOpcode() != TargetOpcode::G_INTRINSIC || + cast(MI).getIntrinsicID() != Intrinsic::spv_length) + return false; + + // First operand of MI is `G_INTRINSIC` so start at operand 2. + Register SubReg = MI.getOperand(2).getReg(); + MachineInstr *SubInstr = MRI.getVRegDef(SubReg); + if (!SubInstr || SubInstr->getOpcode() != TargetOpcode::G_FSUB) + return false; + + return true; +} +void applySPIRVDistance(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) { + + // Extract the operands for X and Y from the match criteria. + Register SubDestReg = MI.getOperand(2).getReg(); + MachineInstr *SubInstr = MRI.getVRegDef(SubDestReg); + Register SubOperand1 = SubInstr->getOperand(1).getReg(); + Register SubOperand2 = SubInstr->getOperand(2).getReg(); + + // Remove the original `spv_length` instruction. + + Register ResultReg = MI.getOperand(0).getReg(); + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock &MBB = *MI.getParent(); + MachineBasicBlock::iterator InsertPt = MI.getIterator(); + + // Build the `spv_distance` intrinsic. + MachineInstrBuilder NewInstr = + BuildMI(MBB, InsertPt, DL, B.getTII().get(TargetOpcode::G_INTRINSIC)); + NewInstr + .addDef(ResultReg) // Result register + .addIntrinsicID(Intrinsic::spv_distance) // Intrinsic ID + .addUse(SubOperand1) // Operand X + .addUse(SubOperand2); // Operand Y + + auto RemoveAllUses = [&](Register Reg) { + SmallVector UsesToErase; + for (auto &UseMI : MRI.use_instructions(Reg)) + UsesToErase.push_back(&UseMI); + + // calling eraseFromParent to early invalidates the iterator. + for (auto *MIToErase : UsesToErase) + MIToErase->eraseFromParent(); + }; + RemoveAllUses(SubDestReg); // remove all uses of FSUB Result + SubInstr->eraseFromParent(); // remove FSUB instruction +} + +class SPIRVPreLegalizerCombinerImpl : public Combiner { +protected: + const CombinerHelper Helper; + const SPIRVPreLegalizerCombinerImplRuleConfig &RuleConfig; + const SPIRVSubtarget &STI; + +public: + SPIRVPreLegalizerCombinerImpl( + MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, + GISelKnownBits &KB, GISelCSEInfo *CSEInfo, + const SPIRVPreLegalizerCombinerImplRuleConfig &RuleConfig, + const SPIRVSubtarget &STI, MachineDominatorTree *MDT, + const LegalizerInfo *LI); + + static const char *getName() { return "SPIRVPreLegalizerCombiner"; } + + bool tryCombineAll(MachineInstr &I) const override; + + bool tryCombineAllImpl(MachineInstr &I) const; + +private: +#define GET_GICOMBINER_CLASS_MEMBERS +#include "SPIRVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CLASS_MEMBERS +}; + +#define GET_GICOMBINER_IMPL +#include "SPIRVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_IMPL + +SPIRVPreLegalizerCombinerImpl::SPIRVPreLegalizerCombinerImpl( + MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, + GISelKnownBits &KB, GISelCSEInfo *CSEInfo, + const SPIRVPreLegalizerCombinerImplRuleConfig &RuleConfig, + const SPIRVSubtarget &STI, MachineDominatorTree *MDT, + const LegalizerInfo *LI) + : Combiner(MF, CInfo, TPC, &KB, CSEInfo), + Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI), + RuleConfig(RuleConfig), STI(STI), +#define GET_GICOMBINER_CONSTRUCTOR_INITS +#include "SPIRVGenPreLegalizeGICombiner.inc" +#undef GET_GICOMBINER_CONSTRUCTOR_INITS +{ +} + +bool SPIRVPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { + return tryCombineAllImpl(MI); +} + +// Pass boilerplate +// ================ + +class SPIRVPreLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + SPIRVPreLegalizerCombiner(); + + StringRef getPassName() const override { return "SPIRVPreLegalizerCombiner"; } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + SPIRVPreLegalizerCombinerImplRuleConfig RuleConfig; +}; + +} // end anonymous namespace + +void SPIRVPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +SPIRVPreLegalizerCombiner::SPIRVPreLegalizerCombiner() + : MachineFunctionPass(ID) { + initializeSPIRVPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); + + if (!RuleConfig.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); +} + +bool SPIRVPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + auto &TPC = getAnalysis(); + + const SPIRVSubtarget &ST = MF.getSubtarget(); + const auto *LI = ST.getLegalizerInfo(); + + const Function &F = MF.getFunction(); + bool EnableOpt = + MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); + GISelKnownBits *KB = &getAnalysis().get(MF); + MachineDominatorTree *MDT = + &getAnalysis().getDomTree(); + CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(), + F.hasMinSize()); + // Disable fixed-point iteration to reduce compile-time + CInfo.MaxIterations = 1; + CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass; + // This is the first Combiner, so the input IR might contain dead + // instructions. + CInfo.EnableFullDCE = false; + SPIRVPreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, /*CSEInfo*/ nullptr, + RuleConfig, ST, MDT, LI); + return Impl.combineMachineInstrs(); +} + +char SPIRVPreLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(SPIRVPreLegalizerCombiner, DEBUG_TYPE, + "Combine SPIRV machine instrs before legalization", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_END(SPIRVPreLegalizerCombiner, DEBUG_TYPE, + "Combine SPIRV machine instrs before legalization", false, + false) + +namespace llvm { +FunctionPass *createSPIRVPreLegalizerCombiner() { + return new SPIRVPreLegalizerCombiner(); +} +} // end namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index dca67cb6c632b..098c7a6fba50e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -48,6 +48,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTarget() { initializeSPIRVModuleAnalysisPass(PR); initializeSPIRVConvergenceRegionAnalysisWrapperPassPass(PR); initializeSPIRVStructurizerPass(PR); + initializeSPIRVPreLegalizerCombinerPass(PR); } static std::string computeDataLayout(const Triple &TT) { @@ -217,6 +218,7 @@ bool SPIRVPassConfig::addIRTranslator() { } void SPIRVPassConfig::addPreLegalizeMachineIR() { + addPass(createSPIRVPreLegalizerCombiner()); addPass(createSPIRVPreLegalizerPass()); } diff --git a/llvm/test/CodeGen/SPIRV/GlobalISel/InstCombine/prelegalizercombiner-length-to-distance.mir b/llvm/test/CodeGen/SPIRV/GlobalISel/InstCombine/prelegalizercombiner-length-to-distance.mir new file mode 100644 index 0000000000000..219b98ecca6f0 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/GlobalISel/InstCombine/prelegalizercombiner-length-to-distance.mir @@ -0,0 +1,26 @@ +# RUN: llc -verify-machineinstrs -O0 -mtriple spirv-unknown-unknown -run-pass=spirv-prelegalizer-combiner %s -o - | FileCheck %s +# REQUIRES: asserts +--- +name: distance_instcombine_float4 +tracksRegLiveness: true +legalized: true +body: | + bb.1.entry: + ; CHECK-LABEL: name: distance_instcombine_float4 + ; CHECK-NOT: %6:_(<4 x s32>) = G_FSUB %2, %3 + ; CHECK-NOT: %7:id(s32) = G_INTRINSIC intrinsic(@llvm.spv.length), %6(<4 x s32>) + ; CHECK: %7:id(s32) = G_INTRINSIC intrinsic(@llvm.spv.distance), %2(<4 x s32>), %3(<4 x s32>) + %0:type(s64) = OpTypeFloat 32 + %1:type(s64) = OpTypeVector %0(s64), 4 + OpName %2(<4 x s32>), 97 + OpName %3(<4 x s32>), 98 + %4:type(s64) = OpTypeFunction %0(s64), %1(s64), %1(s64) + %5:iid(s64) = OpFunction %0(s64), 0, %4(s64) + %2:vfid(<4 x s32>) = OpFunctionParameter %1(s64) + %3:vfid(<4 x s32>) = OpFunctionParameter %1(s64) + OpName %5(s64), 1953720676, 1701015137, 1936615775, 1836016500, 1701734754, 1869375071, 3437665 + OpDecorate %5(s64), 41, 1953720676, 1701015137, 1936615775, 1836016500, 1701734754, 1869375071, 3437665, 0 + %6:_(<4 x s32>) = G_FSUB %2, %3 + %7:id(s32) = G_INTRINSIC intrinsic(@llvm.spv.length), %6(<4 x s32>) + OpReturnValue %7(s32) + \ No newline at end of file diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll index 85a24a0127ae0..cb92f775eef31 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/distance.ll @@ -1,33 +1,44 @@ -; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} - -; Make sure SPIRV operation function calls for distance are lowered correctly. - -; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" -; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 -; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 -; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 -; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 - -define noundef half @distance_half4(<4 x half> noundef %a, <4 x half> noundef %b) { -entry: - ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] - ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] - ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] - ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Distance %[[#arg0]] %[[#arg1]] - %spv.distance = call half @llvm.spv.distance.f16(<4 x half> %a, <4 x half> %b) - ret half %spv.distance -} - -define noundef float @distance_float4(<4 x float> noundef %a, <4 x float> noundef %b) { -entry: - ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] - ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]] - ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]] - ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Distance %[[#arg0]] %[[#arg1]] - %spv.distance = call float @llvm.spv.distance.f32(<4 x float> %a, <4 x float> %b) - ret float %spv.distance -} - -declare half @llvm.spv.distance.f16(<4 x half>, <4 x half>) -declare float @llvm.spv.distance.f32(<4 x float>, <4 x float>) +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for distance are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @distance_half4(<4 x half> noundef %a, <4 x half> noundef %b) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Distance %[[#arg0]] %[[#arg1]] + %spv.distance = call half @llvm.spv.distance.f16(<4 x half> %a, <4 x half> %b) + ret half %spv.distance +} + +define noundef float @distance_float4(<4 x float> noundef %a, <4 x float> noundef %b) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Distance %[[#arg0]] %[[#arg1]] + %spv.distance = call float @llvm.spv.distance.f32(<4 x float> %a, <4 x float> %b) + ret float %spv.distance +} + +define noundef float @distance_instcombine_float4(<4 x float> noundef %a, <4 x float> noundef %b) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Distance %[[#arg0]] %[[#arg1]] + %delta = fsub <4 x float> %a, %b + %spv.length = call float @llvm.spv.length.f32(<4 x float> %delta) + ret float %spv.length +} + +declare half @llvm.spv.distance.f16(<4 x half>, <4 x half>) +declare float @llvm.spv.distance.f32(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/SPIRV/opencl/distance.ll b/llvm/test/CodeGen/SPIRV/opencl/distance.ll index ac18804c00c9a..ed329175e9c07 100644 --- a/llvm/test/CodeGen/SPIRV/opencl/distance.ll +++ b/llvm/test/CodeGen/SPIRV/opencl/distance.ll @@ -30,5 +30,16 @@ entry: ret float %spv.distance } +define noundef float @distance_instcombine_float4(<4 x float> noundef %a, <4 x float> noundef %b) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_cl]] distance %[[#arg0]] %[[#arg1]] + %delta = fsub <4 x float> %a, %b + %spv.length = call float @llvm.spv.length.f32(<4 x float> %delta) + ret float %spv.length +} + declare half @llvm.spv.distance.f16(<4 x half>, <4 x half>) declare float @llvm.spv.distance.f32(<4 x float>, <4 x float>) From fec503d1a3f48e600d0a8e108757dedba909f40c Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 17 Jan 2025 11:43:29 -0800 Subject: [PATCH 52/88] [SLP][NFC]Add safe createExtractVector and use instead Builder.CreateExtractVector --- .../Transforms/Vectorize/SLPVectorizer.cpp | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b0b8f8249d657..0bf01a8c680bf 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4995,6 +4995,23 @@ static Value *createInsertVector( return Vec; } +/// Correctly creates extract_subvector, checking that the index is multiple of +/// the subvectors length. Otherwise, generates shuffle using \p Generator or +/// using default shuffle. +static Value *createExtractVector(IRBuilderBase &Builder, Value *Vec, + unsigned SubVecVF, unsigned Index) { + if (Index % SubVecVF == 0) { + VectorType *SubVecTy = + getWidenedType(Vec->getType()->getScalarType(), SubVecVF); + return Builder.CreateExtractVector(SubVecTy, Vec, Builder.getInt64(Index)); + } + // Create shuffle, extract_subvector requires that index is multiple of + // the subvector length. + SmallVector Mask(SubVecVF, PoisonMaskElem); + std::iota(Mask.begin(), Mask.end(), Index); + return Builder.CreateShuffleVector(Vec, Mask); +} + BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, SmallVectorImpl &Order, @@ -16550,10 +16567,8 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, // When REVEC is enabled, we need to extract a vector. // Note: The element size of Scalar may be different from the // element size of Vec. - Ex = Builder.CreateExtractVector( - FixedVectorType::get(Vec->getType()->getScalarType(), - VecTyNumElements), - Vec, Builder.getInt64(ExternalUse.Lane * VecTyNumElements)); + Ex = createExtractVector(Builder, Vec, VecTyNumElements, + ExternalUse.Lane * VecTyNumElements); } else { Ex = Builder.CreateExtractElement(Vec, Lane); } From 10fdd09c3bda8bfc532cecf4f11babaf356554f3 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 17 Jan 2025 19:51:14 +0000 Subject: [PATCH 53/88] [clang][DebugInfo] Emit DW_AT_object_pointer on function declarations with explicit `this` (#122928) In https://github.com/llvm/llvm-project/pull/122897 we started attaching `DW_AT_object_pointer` to function definitions. This patch does the same but for function declarations (which we do for implicit object pointers already). Fixes https://github.com/llvm/llvm-project/issues/120974 --- clang/lib/CodeGen/CGDebugInfo.cpp | 21 +++++++++++++------ .../CodeGenCXX/debug-info-object-pointer.cpp | 7 +++---- llvm/include/llvm-c/DebugInfo.h | 11 ++++++---- llvm/include/llvm/IR/DIBuilder.h | 6 +++--- llvm/lib/IR/DIBuilder.cpp | 8 +++++-- llvm/lib/IR/DebugInfo.cpp | 9 ++++---- 6 files changed, 39 insertions(+), 23 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index f88f56c98186d..6cbcaf0384410 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -2016,13 +2016,15 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( // First element is always return type. For 'void' functions it is NULL. Elts.push_back(Args[0]); - // "this" pointer is always first argument. - // ThisPtr may be null if the member function has an explicit 'this' - // parameter. - if (!ThisPtr.isNull()) { + const bool HasExplicitObjectParameter = ThisPtr.isNull(); + + // "this" pointer is always first argument. For explicit "this" + // parameters, it will already be in Args[1]. + if (!HasExplicitObjectParameter) { llvm::DIType *ThisPtrType = getOrCreateType(ThisPtr, Unit); TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType); - ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType); + ThisPtrType = + DBuilder.createObjectPointerType(ThisPtrType, /*Implicit=*/true); Elts.push_back(ThisPtrType); } @@ -2030,6 +2032,13 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( for (unsigned i = 1, e = Args.size(); i != e; ++i) Elts.push_back(Args[i]); + // Attach FlagObjectPointer to the explicit "this" parameter. + if (HasExplicitObjectParameter) { + assert(Elts.size() >= 2 && Args.size() >= 2 && + "Expected at least return type and object parameter."); + Elts[1] = DBuilder.createObjectPointerType(Args[1], /*Implicit=*/false); + } + llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts); return DBuilder.createSubroutineType(EltTypeArray, OriginalFunc->getFlags(), @@ -5118,7 +5127,7 @@ llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, llvm::DIType *CachedTy = getTypeOrNull(QualTy); if (CachedTy) Ty = CachedTy; - return DBuilder.createObjectPointerType(Ty); + return DBuilder.createObjectPointerType(Ty, /*Implicit=*/true); } void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( diff --git a/clang/test/CodeGenCXX/debug-info-object-pointer.cpp b/clang/test/CodeGenCXX/debug-info-object-pointer.cpp index 594d4da791ee8..49079f5990996 100644 --- a/clang/test/CodeGenCXX/debug-info-object-pointer.cpp +++ b/clang/test/CodeGenCXX/debug-info-object-pointer.cpp @@ -5,12 +5,11 @@ // CHECK: !DIDerivedType(tag: DW_TAG_pointer_type // CHECK-SAME: flags: DIFlagArtificial | DIFlagObjectPointer // -// // FIXME: DIFlagObjectPointer not attached to the explicit object -// // argument in the subprogram declaration. // CHECK: !DISubprogram(name: "explicit_this", // flags: DIFlagPrototyped -// CHECK-NOT: DIFlagObjectPointer -// CHECK-NOT: DIFlagArtificial +// +// CHECK: !DIDerivedType(tag: DW_TAG_rvalue_reference_type +// CHECK-SAME: flags: DIFlagObjectPointer) // // CHECK: !DILocalVariable(name: "this", arg: 1 // CHECK-SAME: flags: DIFlagArtificial | DIFlagObjectPointer diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index 07f87d44088e7..ac7ee5a7cc9a1 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -870,13 +870,16 @@ LLVMDIBuilderCreateObjCProperty(LLVMDIBuilderRef Builder, LLVMMetadataRef Ty); /** - * Create a uniqued DIType* clone with FlagObjectPointer and FlagArtificial set. + * Create a uniqued DIType* clone with FlagObjectPointer. If \c Implicit + * is true, then also set FlagArtificial. * \param Builder The DIBuilder. * \param Type The underlying type to which this pointer points. + * \param Implicit Indicates whether this pointer was implicitly generated + * (i.e., not spelled out in source). */ -LLVMMetadataRef -LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, - LLVMMetadataRef Type); +LLVMMetadataRef LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, + LLVMMetadataRef Type, + LLVMBool Implicit); /** * Create debugging information entry for a qualified diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index cb1150c269a1d..6c479415b9ed2 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -662,9 +662,9 @@ namespace llvm { /// Create a uniqued clone of \p Ty with FlagArtificial set. static DIType *createArtificialType(DIType *Ty); - /// Create a uniqued clone of \p Ty with FlagObjectPointer and - /// FlagArtificial set. - static DIType *createObjectPointerType(DIType *Ty); + /// Create a uniqued clone of \p Ty with FlagObjectPointer set. + /// If \p Implicit is true, also set FlagArtificial. + static DIType *createObjectPointerType(DIType *Ty, bool Implicit); /// Create a permanent forward-declared type. DICompositeType *createForwardDecl(unsigned Tag, StringRef Name, diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index b240a2a39de36..d9bd4f11e89a3 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -644,11 +644,15 @@ DIType *DIBuilder::createArtificialType(DIType *Ty) { return createTypeWithFlags(Ty, DINode::FlagArtificial); } -DIType *DIBuilder::createObjectPointerType(DIType *Ty) { +DIType *DIBuilder::createObjectPointerType(DIType *Ty, bool Implicit) { // FIXME: Restrict this to the nodes where it's valid. if (Ty->isObjectPointer()) return Ty; - DINode::DIFlags Flags = DINode::FlagObjectPointer | DINode::FlagArtificial; + DINode::DIFlags Flags = DINode::FlagObjectPointer; + + if (Implicit) + Flags |= DINode::FlagArtificial; + return createTypeWithFlags(Ty, Flags); } diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index e5b45e0082a82..4ce518009bd3e 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1432,10 +1432,11 @@ LLVMDIBuilderCreateObjCProperty(LLVMDIBuilderRef Builder, PropertyAttributes, unwrapDI(Ty))); } -LLVMMetadataRef -LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, - LLVMMetadataRef Type) { - return wrap(unwrap(Builder)->createObjectPointerType(unwrapDI(Type))); +LLVMMetadataRef LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, + LLVMMetadataRef Type, + LLVMBool Implicit) { + return wrap(unwrap(Builder)->createObjectPointerType(unwrapDI(Type), + Implicit)); } LLVMMetadataRef From 128e2e446e90c3b1827cfc7d4d19e3c0976beff3 Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Thu, 16 Jan 2025 18:08:53 -0800 Subject: [PATCH 54/88] [SandboxVec][VecUtils][NFC] Move functions to VecUtils.cpp and add a VecUtils::dump() --- .../Vectorize/SandboxVectorizer/VecUtils.h | 16 ++++------ llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 + .../Vectorize/SandboxVectorizer/VecUtils.cpp | 32 +++++++++++++++++++ 3 files changed, 40 insertions(+), 9 deletions(-) create mode 100644 llvm/lib/Transforms/Vectorize/SandboxVectorizer/VecUtils.cpp diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h index 28fa33656dd5f..6cbbb396ea823 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h @@ -134,15 +134,13 @@ class VecUtils { return ScalarTy; } /// \Returns the first integer power of 2 that is <= Num. - static unsigned getFloorPowerOf2(unsigned Num) { - if (Num == 0) - return Num; - unsigned Mask = Num; - Mask >>= 1; - for (unsigned ShiftBy = 1; ShiftBy < sizeof(Num) * 8; ShiftBy <<= 1) - Mask |= Mask >> ShiftBy; - return Num & ~Mask; - } + static unsigned getFloorPowerOf2(unsigned Num); + +#ifndef NDEBUG + /// Helper dump function for debugging. + LLVM_DUMP_METHOD static void dump(ArrayRef Bndl); + LLVM_DUMP_METHOD static void dump(ArrayRef Bndl); +#endif // NDEBUG }; } // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 6a025652f92f8..e5fabd318b82c 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_component_library(LLVMVectorize SandboxVectorizer/SandboxVectorizerPassBuilder.cpp SandboxVectorizer/Scheduler.cpp SandboxVectorizer/SeedCollector.cpp + SandboxVectorizer/VecUtils.cpp SLPVectorizer.cpp Vectorize.cpp VectorCombine.cpp diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/VecUtils.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/VecUtils.cpp new file mode 100644 index 0000000000000..6f9ef07e467d2 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/VecUtils.cpp @@ -0,0 +1,32 @@ +//===- VecUtils.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" + +namespace llvm::sandboxir { + +unsigned VecUtils::getFloorPowerOf2(unsigned Num) { + if (Num == 0) + return Num; + unsigned Mask = Num; + Mask >>= 1; + for (unsigned ShiftBy = 1; ShiftBy < sizeof(Num) * 8; ShiftBy <<= 1) + Mask |= Mask >> ShiftBy; + return Num & ~Mask; +} + +#ifndef NDEBUG +template static void dumpImpl(ArrayRef Bndl) { + for (auto [Idx, V] : enumerate(Bndl)) + dbgs() << Idx << "." << *V << "\n"; +} +void VecUtils::dump(ArrayRef Bndl) { dumpImpl(Bndl); } +void VecUtils::dump(ArrayRef Bndl) { dumpImpl(Bndl); } +#endif // NDEBUG + +} // namespace llvm::sandboxir From 6b048aeaf837e0e16fece94610f0871d17cefe4c Mon Sep 17 00:00:00 2001 From: Jacob Lalonde Date: Fri, 17 Jan 2025 12:00:31 -0800 Subject: [PATCH 55/88] [LLDB] Add SBProgress so Python scripts can also report progress (#119052) Recently I've been working on a lot of internal Python tooling, and in certain cases I want to report async to the script over DAP. Progress.h already handles this, so I've exposed Progress via the SB API so Python scripts can also update progress objects. I actually have no idea how to test this, so I just wrote a [toy command to test it](https://gist.github.com/Jlalond/48d85e75a91f7a137e3142e6a13d0947) ![image](https://github.com/user-attachments/assets/7317cbb8-9145-4fdb-bacf-9864bf50c467) I also copied the first section of the extensive Progress.h class documentation to the docstrings. --- lldb/bindings/headers.swig | 1 + .../bindings/interface/SBProgressDocstrings.i | 14 ++++ lldb/bindings/interfaces.swig | 2 + lldb/include/lldb/API/SBDebugger.h | 3 +- lldb/include/lldb/API/SBProgress.h | 66 +++++++++++++++++++ lldb/include/lldb/lldb-forward.h | 1 + lldb/source/API/CMakeLists.txt | 1 + lldb/source/API/SBProgress.cpp | 43 ++++++++++++ lldb/source/Core/Debugger.cpp | 3 +- .../python_api/sbprogress/TestSBProgress.py | 35 ++++++++++ 10 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 lldb/bindings/interface/SBProgressDocstrings.i create mode 100644 lldb/include/lldb/API/SBProgress.h create mode 100644 lldb/source/API/SBProgress.cpp create mode 100644 lldb/test/API/python_api/sbprogress/TestSBProgress.py diff --git a/lldb/bindings/headers.swig b/lldb/bindings/headers.swig index c0dde905f986b..5e7c54d1eb839 100644 --- a/lldb/bindings/headers.swig +++ b/lldb/bindings/headers.swig @@ -52,6 +52,7 @@ #include "lldb/API/SBProcess.h" #include "lldb/API/SBProcessInfo.h" #include "lldb/API/SBProcessInfoList.h" +#include "lldb/API/SBProgress.h" #include "lldb/API/SBQueue.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBReproducer.h" diff --git a/lldb/bindings/interface/SBProgressDocstrings.i b/lldb/bindings/interface/SBProgressDocstrings.i new file mode 100644 index 0000000000000..2997fe619fcc7 --- /dev/null +++ b/lldb/bindings/interface/SBProgressDocstrings.i @@ -0,0 +1,14 @@ +%feature("docstring", +"A Progress indicator helper class. + +Any potentially long running sections of code in LLDB should report +progress so that clients are aware of delays that might appear during +debugging. Delays commonly include indexing debug information, parsing +symbol tables for object files, downloading symbols from remote +repositories, and many more things. + +The Progress class helps make sure that progress is correctly reported +and will always send an initial progress update, updates when +Progress::Increment() is called, and also will make sure that a progress +completed update is reported even if the user doesn't explicitly cause one +to be sent.") lldb::SBProgress; diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index 8a6fed95f0b72..08df9a1a8d539 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -54,6 +54,7 @@ %include "./interface/SBPlatformDocstrings.i" %include "./interface/SBProcessDocstrings.i" %include "./interface/SBProcessInfoDocstrings.i" +%include "./interface/SBProgressDocstrings.i" %include "./interface/SBQueueDocstrings.i" %include "./interface/SBQueueItemDocstrings.i" %include "./interface/SBReproducerDocstrings.i" @@ -133,6 +134,7 @@ %include "lldb/API/SBProcess.h" %include "lldb/API/SBProcessInfo.h" %include "lldb/API/SBProcessInfoList.h" +%include "lldb/API/SBProgress.h" %include "lldb/API/SBQueue.h" %include "lldb/API/SBQueueItem.h" %include "lldb/API/SBReproducer.h" diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 787bd040dd15b..eb371e33c4951 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -203,7 +203,7 @@ class LLDB_API SBDebugger { lldb::SBCommandInterpreter GetCommandInterpreter(); void HandleCommand(const char *command); - + void RequestInterrupt(); void CancelInterruptRequest(); bool InterruptRequested(); @@ -517,6 +517,7 @@ class LLDB_API SBDebugger { friend class SBPlatform; friend class SBTarget; friend class SBTrace; + friend class SBProgress; lldb::SBTarget FindTargetWithLLDBProcess(const lldb::ProcessSP &processSP); diff --git a/lldb/include/lldb/API/SBProgress.h b/lldb/include/lldb/API/SBProgress.h new file mode 100644 index 0000000000000..d2eaf0a743cb3 --- /dev/null +++ b/lldb/include/lldb/API/SBProgress.h @@ -0,0 +1,66 @@ +//===-- SBProgress.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBPROGRESS_H +#define LLDB_API_SBPROGRESS_H + +#include "lldb/API/SBDebugger.h" +#include "lldb/API/SBDefines.h" + +namespace lldb { + +/// A Progress indicator helper class. +/// +/// Any potentially long running sections of code in LLDB should report +/// progress so that clients are aware of delays that might appear during +/// debugging. Delays commonly include indexing debug information, parsing +/// symbol tables for object files, downloading symbols from remote +/// repositories, and many more things. +/// +/// The Progress class helps make sure that progress is correctly reported +/// and will always send an initial progress update, updates when +/// Progress::Increment() is called, and also will make sure that a progress +/// completed update is reported even if the user doesn't explicitly cause one +/// to be sent. +class LLDB_API SBProgress { +public: + /// Construct a progress object with a title, details and a given debugger. + /// \param title + /// The title of the progress object. + /// \param details + /// The details of the progress object. + /// \param debugger + /// The debugger for this progress object to report to. + SBProgress(const char *title, const char *details, SBDebugger &debugger); + + /// Construct a progress object with a title, details, the total units of work + /// to be done, and a given debugger. + /// \param title + /// The title of the progress object. + /// \param details + /// The details of the progress object. + /// \param total_units + /// The total number of units of work to be done. + /// \param debugger + /// The debugger for this progress object to report to. + SBProgress(const char *title, const char *details, uint64_t total_units, + SBDebugger &debugger); + + ~SBProgress(); + + void Increment(uint64_t amount, const char *description = nullptr); + +protected: + lldb_private::Progress &ref() const; + +private: + std::unique_ptr m_opaque_up; +}; // SBProgress +} // namespace lldb + +#endif // LLDB_API_SBPROGRESS_H diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index d09edeeccaff1..fc7456a4b9a32 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -233,6 +233,7 @@ class Symtab; class SyntheticChildren; class SyntheticChildrenFrontEnd; class SystemRuntime; +class Progress; class Target; class TargetList; class TargetProperties; diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index d8308841c05db..147b30f3b0026 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -83,6 +83,7 @@ add_lldb_library(liblldb SHARED ${option_framework} SBModule.cpp SBModuleSpec.cpp SBPlatform.cpp + SBProgress.cpp SBProcess.cpp SBProcessInfo.cpp SBProcessInfoList.cpp diff --git a/lldb/source/API/SBProgress.cpp b/lldb/source/API/SBProgress.cpp new file mode 100644 index 0000000000000..d6ed5f0d15fc9 --- /dev/null +++ b/lldb/source/API/SBProgress.cpp @@ -0,0 +1,43 @@ +//===-- SBProgress.cpp --------------------------------------------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBProgress.h" +#include "lldb/Core/Progress.h" +#include "lldb/Utility/Instrumentation.h" + +using namespace lldb; + +SBProgress::SBProgress(const char *title, const char *details, + SBDebugger &debugger) { + LLDB_INSTRUMENT_VA(this, title, details, debugger); + + m_opaque_up = std::make_unique( + title, details, /*total=*/std::nullopt, debugger.get(), + /*minimum_report_time=*/std::nullopt, + lldb_private::Progress::Origin::eExternal); +} + +SBProgress::SBProgress(const char *title, const char *details, + uint64_t total_units, SBDebugger &debugger) { + LLDB_INSTRUMENT_VA(this, title, details, total_units, debugger); + + m_opaque_up = std::make_unique( + title, details, total_units, debugger.get(), + /*minimum_report_time=*/std::nullopt, + lldb_private::Progress::Origin::eExternal); +} + +SBProgress::~SBProgress() = default; + +void SBProgress::Increment(uint64_t amount, const char *description) { + LLDB_INSTRUMENT_VA(amount, description); + + m_opaque_up->Increment(amount, description); +} + +lldb_private::Progress &SBProgress::ref() const { return *m_opaque_up; } diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index 6ceb209269c9e..2df2aeb20aa26 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -1952,7 +1952,8 @@ lldb::thread_result_t Debugger::DefaultEventHandler() { listener_sp->StartListeningForEvents( &m_broadcaster, lldb::eBroadcastBitProgress | lldb::eBroadcastBitWarning | lldb::eBroadcastBitError | - lldb::eBroadcastSymbolChange); + lldb::eBroadcastSymbolChange | + lldb::eBroadcastBitExternalProgress); // Let the thread that spawned us know that we have started up and that we // are now listening to all required events so no events get missed diff --git a/lldb/test/API/python_api/sbprogress/TestSBProgress.py b/lldb/test/API/python_api/sbprogress/TestSBProgress.py new file mode 100644 index 0000000000000..c456247da80c6 --- /dev/null +++ b/lldb/test/API/python_api/sbprogress/TestSBProgress.py @@ -0,0 +1,35 @@ +"""Test the SBProgress API.""" + +import lldb +from lldbsuite.test.lldbtest import * + + +class SBProgressTestCase(TestBase): + def test_with_external_bit_set(self): + """Test SBProgress events are listened to when the external bit is set.""" + + progress = lldb.SBProgress("Test SBProgress", "Test progress", self.dbg) + listener = lldb.SBListener("Test listener") + broadcaster = self.dbg.GetBroadcaster() + broadcaster.AddListener(listener, lldb.eBroadcastBitExternalProgress) + event = lldb.SBEvent() + + expected_string = "Test progress first increment" + progress.Increment(1, expected_string) + self.assertTrue(listener.PeekAtNextEvent(event)) + stream = lldb.SBStream() + event.GetDescription(stream) + self.assertIn(expected_string, stream.GetData()) + + def test_without_external_bit_set(self): + """Test SBProgress events are not listened to on the internal progress bit.""" + + progress = lldb.SBProgress("Test SBProgress", "Test progress", self.dbg) + listener = lldb.SBListener("Test listener") + broadcaster = self.dbg.GetBroadcaster() + broadcaster.AddListener(listener, lldb.eBroadcastBitProgress) + event = lldb.SBEvent() + + expected_string = "Test progress first increment" + progress.Increment(1, expected_string) + self.assertFalse(listener.PeekAtNextEvent(event)) From a807b2feb8bd2bcb4c611599d52d287c443c7e0b Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 17 Jan 2025 20:01:11 +0000 Subject: [PATCH 56/88] [gn build] Port 128e2e446e90 --- llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn index dab25dd6ad6d2..f59b6446f0dea 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn @@ -23,6 +23,7 @@ static_library("Vectorize") { "SandboxVectorizer/SandboxVectorizerPassBuilder.cpp", "SandboxVectorizer/Scheduler.cpp", "SandboxVectorizer/SeedCollector.cpp", + "SandboxVectorizer/VecUtils.cpp", "VPlan.cpp", "VPlanAnalysis.cpp", "VPlanHCFGBuilder.cpp", From 580ba2eed29a18c75727e1cad910b8cfb4cfca59 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 17 Jan 2025 20:01:12 +0000 Subject: [PATCH 57/88] [gn build] Port 6b048aeaf837 --- llvm/utils/gn/secondary/lldb/source/API/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn index fae0c22710b06..5e833cae6a4b1 100644 --- a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn @@ -84,6 +84,7 @@ target(liblldb_type, "liblldb") { "SBProcess.cpp", "SBProcessInfo.cpp", "SBProcessInfoList.cpp", + "SBProgress.cpp", "SBQueue.cpp", "SBQueueItem.cpp", "SBReproducer.cpp", From 71ff486bee1b089c78f5b8175fef16f99fcebe19 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Fri, 17 Jan 2025 12:09:44 -0800 Subject: [PATCH 58/88] Reland "[flang] Inline hlfir.dot_product. (#123143)" (#123385) This reverts commit afc43a7b626ae07f56e6534320e0b46d26070750. +Fixed declaration of hlfir::genExtentsVector(). Some good results for induct2, where dot_product is applied to a vector of unknow size and a known 3-element vector: the inlining ends up generating a 3-iteration loop, which is then fully unrolled. With late FIR simplification it is not happening even when the simplified intrinsics implementation is inlined by LLVM (because the loop bounds are not known). This change just follows the current approach to expose the loops for later worksharing application. --- .../flang/Optimizer/Builder/HLFIRTools.h | 6 + flang/lib/Optimizer/Builder/HLFIRTools.cpp | 12 + .../Transforms/SimplifyHLFIRIntrinsics.cpp | 279 ++++++++++-------- .../simplify-hlfir-intrinsics-dotproduct.fir | 144 +++++++++ 4 files changed, 326 insertions(+), 115 deletions(-) create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6e85b8f4ddf86..0684ad0f926ec 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -513,6 +513,12 @@ genTypeAndKindConvert(mlir::Location loc, fir::FirOpBuilder &builder, Entity loadElementAt(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity, mlir::ValueRange oneBasedIndices); +/// Return a vector of extents for the given entity. +/// The function creates new operations, but tries to clean-up +/// after itself. +llvm::SmallVector +genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity); + } // namespace hlfir #endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 5e5d0bbd68132..f71adf123511d 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -1421,3 +1421,15 @@ hlfir::Entity hlfir::loadElementAt(mlir::Location loc, return loadTrivialScalar(loc, builder, getElementAt(loc, builder, entity, oneBasedIndices)); } + +llvm::SmallVector +hlfir::genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity entity) { + entity = hlfir::derefPointersAndAllocatables(loc, builder, entity); + mlir::Value shape = hlfir::genShape(loc, builder, entity); + llvm::SmallVector extents = + hlfir::getExplicitExtentsFromShape(shape, builder); + if (shape.getUses().empty()) + shape.getDefiningOp()->erase(); + return extents; +} diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp index 0fe3620b7f1ae..fe7ae0eeed3cc 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp @@ -37,6 +37,79 @@ static llvm::cl::opt forceMatmulAsElemental( namespace { +// Helper class to generate operations related to computing +// product of values. +class ProductFactory { +public: + ProductFactory(mlir::Location loc, fir::FirOpBuilder &builder) + : loc(loc), builder(builder) {} + + // Generate an update of the inner product value: + // acc += v1 * v2, OR + // acc += CONJ(v1) * v2, OR + // acc ||= v1 && v2 + // + // CONJ parameter specifies whether the first complex product argument + // needs to be conjugated. + template + mlir::Value genAccumulateProduct(mlir::Value acc, mlir::Value v1, + mlir::Value v2) { + mlir::Type resultType = acc.getType(); + acc = castToProductType(acc, resultType); + v1 = castToProductType(v1, resultType); + v2 = castToProductType(v2, resultType); + mlir::Value result; + if (mlir::isa(resultType)) { + result = builder.create( + loc, acc, builder.create(loc, v1, v2)); + } else if (mlir::isa(resultType)) { + if constexpr (CONJ) + result = fir::IntrinsicLibrary{builder, loc}.genConjg(resultType, v1); + else + result = v1; + + result = builder.create( + loc, acc, builder.create(loc, result, v2)); + } else if (mlir::isa(resultType)) { + result = builder.create( + loc, acc, builder.create(loc, v1, v2)); + } else if (mlir::isa(resultType)) { + result = builder.create( + loc, acc, builder.create(loc, v1, v2)); + } else { + llvm_unreachable("unsupported type"); + } + + return builder.createConvert(loc, resultType, result); + } + +private: + mlir::Location loc; + fir::FirOpBuilder &builder; + + mlir::Value castToProductType(mlir::Value value, mlir::Type type) { + if (mlir::isa(type)) + return builder.createConvert(loc, builder.getIntegerType(1), value); + + // TODO: the multiplications/additions by/of zero resulting from + // complex * real are optimized by LLVM under -fno-signed-zeros + // -fno-honor-nans. + // We can make them disappear by default if we: + // * either expand the complex multiplication into real + // operations, OR + // * set nnan nsz fast-math flags to the complex operations. + if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { + mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); + fir::factory::Complex helper(builder, loc); + mlir::Type partType = helper.getComplexPartType(type); + return helper.insertComplexPart(zeroCmplx, + castToProductType(value, partType), + /*isImagPart=*/false); + } + return builder.createConvert(loc, type, value); + } +}; + class TransposeAsElementalConversion : public mlir::OpRewritePattern { public: @@ -90,11 +163,8 @@ class TransposeAsElementalConversion static mlir::Value genResultShape(mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity array) { - mlir::Value inShape = hlfir::genShape(loc, builder, array); - llvm::SmallVector inExtents = - hlfir::getExplicitExtentsFromShape(inShape, builder); - if (inShape.getUses().empty()) - inShape.getDefiningOp()->erase(); + llvm::SmallVector inExtents = + hlfir::genExtentsVector(loc, builder, array); // transpose indices assert(inExtents.size() == 2 && "checked in TransposeOp::validate"); @@ -137,7 +207,7 @@ class SumAsElementalConversion : public mlir::OpRewritePattern { mlir::Value resultShape, dimExtent; llvm::SmallVector arrayExtents; if (isTotalReduction) - arrayExtents = genArrayExtents(loc, builder, array); + arrayExtents = hlfir::genExtentsVector(loc, builder, array); else std::tie(resultShape, dimExtent) = genResultShapeForPartialReduction(loc, builder, array, dimVal); @@ -163,7 +233,8 @@ class SumAsElementalConversion : public mlir::OpRewritePattern { // If DIM is not present, do total reduction. // Initial value for the reduction. - mlir::Value reductionInitValue = genInitValue(loc, builder, elementType); + mlir::Value reductionInitValue = + fir::factory::createZeroValue(builder, loc, elementType); // The reduction loop may be unordered if FastMathFlags::reassoc // transformations are allowed. The integer reduction is always @@ -264,17 +335,6 @@ class SumAsElementalConversion : public mlir::OpRewritePattern { } private: - static llvm::SmallVector - genArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder, - hlfir::Entity array) { - mlir::Value inShape = hlfir::genShape(loc, builder, array); - llvm::SmallVector inExtents = - hlfir::getExplicitExtentsFromShape(inShape, builder); - if (inShape.getUses().empty()) - inShape.getDefiningOp()->erase(); - return inExtents; - } - // Return fir.shape specifying the shape of the result // of a SUM reduction with DIM=dimVal. The second return value // is the extent of the DIM dimension. @@ -283,7 +343,7 @@ class SumAsElementalConversion : public mlir::OpRewritePattern { fir::FirOpBuilder &builder, hlfir::Entity array, int64_t dimVal) { llvm::SmallVector inExtents = - genArrayExtents(loc, builder, array); + hlfir::genExtentsVector(loc, builder, array); assert(dimVal > 0 && dimVal <= static_cast(inExtents.size()) && "DIM must be present and a positive constant not exceeding " "the array's rank"); @@ -293,26 +353,6 @@ class SumAsElementalConversion : public mlir::OpRewritePattern { return {builder.create(loc, inExtents), dimExtent}; } - // Generate the initial value for a SUM reduction with the given - // data type. - static mlir::Value genInitValue(mlir::Location loc, - fir::FirOpBuilder &builder, - mlir::Type elementType) { - if (auto ty = mlir::dyn_cast(elementType)) { - const llvm::fltSemantics &sem = ty.getFloatSemantics(); - return builder.createRealConstant(loc, elementType, - llvm::APFloat::getZero(sem)); - } else if (auto ty = mlir::dyn_cast(elementType)) { - mlir::Value initValue = genInitValue(loc, builder, ty.getElementType()); - return fir::factory::Complex{builder, loc}.createComplex(ty, initValue, - initValue); - } else if (mlir::isa(elementType)) { - return builder.createIntegerConstant(loc, elementType, 0); - } - - llvm_unreachable("unsupported SUM reduction type"); - } - // Generate scalar addition of the two values (of the same data type). static mlir::Value genScalarAdd(mlir::Location loc, fir::FirOpBuilder &builder, @@ -570,16 +610,10 @@ class MatmulConversion : public mlir::OpRewritePattern { static std::tuple genResultShape(mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity input1, hlfir::Entity input2) { - mlir::Value input1Shape = hlfir::genShape(loc, builder, input1); - llvm::SmallVector input1Extents = - hlfir::getExplicitExtentsFromShape(input1Shape, builder); - if (input1Shape.getUses().empty()) - input1Shape.getDefiningOp()->erase(); - mlir::Value input2Shape = hlfir::genShape(loc, builder, input2); - llvm::SmallVector input2Extents = - hlfir::getExplicitExtentsFromShape(input2Shape, builder); - if (input2Shape.getUses().empty()) - input2Shape.getDefiningOp()->erase(); + llvm::SmallVector input1Extents = + hlfir::genExtentsVector(loc, builder, input1); + llvm::SmallVector input2Extents = + hlfir::genExtentsVector(loc, builder, input2); llvm::SmallVector newExtents; mlir::Value innerProduct1Extent, innerProduct2Extent; @@ -627,60 +661,6 @@ class MatmulConversion : public mlir::OpRewritePattern { innerProductExtent[0]}; } - static mlir::Value castToProductType(mlir::Location loc, - fir::FirOpBuilder &builder, - mlir::Value value, mlir::Type type) { - if (mlir::isa(type)) - return builder.createConvert(loc, builder.getIntegerType(1), value); - - // TODO: the multiplications/additions by/of zero resulting from - // complex * real are optimized by LLVM under -fno-signed-zeros - // -fno-honor-nans. - // We can make them disappear by default if we: - // * either expand the complex multiplication into real - // operations, OR - // * set nnan nsz fast-math flags to the complex operations. - if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { - mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); - fir::factory::Complex helper(builder, loc); - mlir::Type partType = helper.getComplexPartType(type); - return helper.insertComplexPart( - zeroCmplx, castToProductType(loc, builder, value, partType), - /*isImagPart=*/false); - } - return builder.createConvert(loc, type, value); - } - - // Generate an update of the inner product value: - // acc += v1 * v2, OR - // acc ||= v1 && v2 - static mlir::Value genAccumulateProduct(mlir::Location loc, - fir::FirOpBuilder &builder, - mlir::Type resultType, - mlir::Value acc, mlir::Value v1, - mlir::Value v2) { - acc = castToProductType(loc, builder, acc, resultType); - v1 = castToProductType(loc, builder, v1, resultType); - v2 = castToProductType(loc, builder, v2, resultType); - mlir::Value result; - if (mlir::isa(resultType)) - result = builder.create( - loc, acc, builder.create(loc, v1, v2)); - else if (mlir::isa(resultType)) - result = builder.create( - loc, acc, builder.create(loc, v1, v2)); - else if (mlir::isa(resultType)) - result = builder.create( - loc, acc, builder.create(loc, v1, v2)); - else if (mlir::isa(resultType)) - result = builder.create( - loc, acc, builder.create(loc, v1, v2)); - else - llvm_unreachable("unsupported type"); - - return builder.createConvert(loc, resultType, result); - } - static mlir::LogicalResult genContiguousMatmul(mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity result, mlir::Value resultShape, @@ -748,9 +728,9 @@ class MatmulConversion : public mlir::OpRewritePattern { hlfir::loadElementAt(loc, builder, lhs, {I, K}); hlfir::Entity rhsElementValue = hlfir::loadElementAt(loc, builder, rhs, {K, J}); - mlir::Value productValue = genAccumulateProduct( - loc, builder, resultElementType, resultElementValue, - lhsElementValue, rhsElementValue); + mlir::Value productValue = + ProductFactory{loc, builder}.genAccumulateProduct( + resultElementValue, lhsElementValue, rhsElementValue); builder.create(loc, productValue, resultElement); return {}; }; @@ -785,9 +765,9 @@ class MatmulConversion : public mlir::OpRewritePattern { hlfir::loadElementAt(loc, builder, lhs, {J, K}); hlfir::Entity rhsElementValue = hlfir::loadElementAt(loc, builder, rhs, {K}); - mlir::Value productValue = genAccumulateProduct( - loc, builder, resultElementType, resultElementValue, - lhsElementValue, rhsElementValue); + mlir::Value productValue = + ProductFactory{loc, builder}.genAccumulateProduct( + resultElementValue, lhsElementValue, rhsElementValue); builder.create(loc, productValue, resultElement); return {}; }; @@ -817,9 +797,9 @@ class MatmulConversion : public mlir::OpRewritePattern { hlfir::loadElementAt(loc, builder, lhs, {K}); hlfir::Entity rhsElementValue = hlfir::loadElementAt(loc, builder, rhs, {K, J}); - mlir::Value productValue = genAccumulateProduct( - loc, builder, resultElementType, resultElementValue, - lhsElementValue, rhsElementValue); + mlir::Value productValue = + ProductFactory{loc, builder}.genAccumulateProduct( + resultElementValue, lhsElementValue, rhsElementValue); builder.create(loc, productValue, resultElement); return {}; }; @@ -885,9 +865,9 @@ class MatmulConversion : public mlir::OpRewritePattern { hlfir::loadElementAt(loc, builder, lhs, lhsIndices); hlfir::Entity rhsElementValue = hlfir::loadElementAt(loc, builder, rhs, rhsIndices); - mlir::Value productValue = genAccumulateProduct( - loc, builder, resultElementType, reductionArgs[0], lhsElementValue, - rhsElementValue); + mlir::Value productValue = + ProductFactory{loc, builder}.genAccumulateProduct( + reductionArgs[0], lhsElementValue, rhsElementValue); return {productValue}; }; llvm::SmallVector innerProductValue = @@ -904,6 +884,73 @@ class MatmulConversion : public mlir::OpRewritePattern { } }; +class DotProductConversion + : public mlir::OpRewritePattern { +public: + using mlir::OpRewritePattern::OpRewritePattern; + + llvm::LogicalResult + matchAndRewrite(hlfir::DotProductOp product, + mlir::PatternRewriter &rewriter) const override { + hlfir::Entity op = hlfir::Entity{product}; + if (!op.isScalar()) + return rewriter.notifyMatchFailure(product, "produces non-scalar result"); + + mlir::Location loc = product.getLoc(); + fir::FirOpBuilder builder{rewriter, product.getOperation()}; + hlfir::Entity lhs = hlfir::Entity{product.getLhs()}; + hlfir::Entity rhs = hlfir::Entity{product.getRhs()}; + mlir::Type resultElementType = product.getType(); + bool isUnordered = mlir::isa(resultElementType) || + mlir::isa(resultElementType) || + static_cast(builder.getFastMathFlags() & + mlir::arith::FastMathFlags::reassoc); + + mlir::Value extent = genProductExtent(loc, builder, lhs, rhs); + + auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder, + mlir::ValueRange oneBasedIndices, + mlir::ValueRange reductionArgs) + -> llvm::SmallVector { + hlfir::Entity lhsElementValue = + hlfir::loadElementAt(loc, builder, lhs, oneBasedIndices); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, oneBasedIndices); + mlir::Value productValue = + ProductFactory{loc, builder}.genAccumulateProduct( + reductionArgs[0], lhsElementValue, rhsElementValue); + return {productValue}; + }; + + mlir::Value initValue = + fir::factory::createZeroValue(builder, loc, resultElementType); + + llvm::SmallVector result = hlfir::genLoopNestWithReductions( + loc, builder, {extent}, + /*reductionInits=*/{initValue}, genBody, isUnordered); + + rewriter.replaceOp(product, result[0]); + return mlir::success(); + } + +private: + static mlir::Value genProductExtent(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity input1, + hlfir::Entity input2) { + llvm::SmallVector input1Extents = + hlfir::genExtentsVector(loc, builder, input1); + llvm::SmallVector input2Extents = + hlfir::genExtentsVector(loc, builder, input2); + + assert(input1Extents.size() == 1 && input2Extents.size() == 1 && + "hlfir.dot_product arguments must be vectors"); + llvm::SmallVector extent = + fir::factory::deduceOptimalExtents(input1Extents, input2Extents); + return extent[0]; + } +}; + class SimplifyHLFIRIntrinsics : public hlfir::impl::SimplifyHLFIRIntrinsicsBase { public: @@ -939,6 +986,8 @@ class SimplifyHLFIRIntrinsics if (forceMatmulAsElemental || this->allowNewSideEffects) patterns.insert>(context); + patterns.insert(context); + if (mlir::failed(mlir::applyPatternsGreedily( getOperation(), std::move(patterns), config))) { mlir::emitError(getOperation()->getLoc(), diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir new file mode 100644 index 0000000000000..f59b1422dbc84 --- /dev/null +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir @@ -0,0 +1,144 @@ +// Test hlfir.dot_product simplification to a reduction loop: +// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s + +func.func @dot_product_integer(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> i32 { + %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> i32 + return %res : i32 +} +// CHECK-LABEL: func.func @dot_product_integer( +// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> i32 { +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (i32) { +// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> i16 +// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> i32 +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32 +// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_11]], %[[VAL_10]] : i32 +// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_12]] : i32 +// CHECK: fir.result %[[VAL_13]] : i32 +// CHECK: } +// CHECK: return %[[VAL_6]] : i32 +// CHECK: } + +func.func @dot_product_real(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> f32 { + %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> f32 + return %res : f32 +} +// CHECK-LABEL: func.func @dot_product_real( +// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> f32 { +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (f32) { +// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> f32 +// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> f16 +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (f16) -> f32 +// CHECK: %[[VAL_12:.*]] = arith.mulf %[[VAL_9]], %[[VAL_11]] : f32 +// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_12]] : f32 +// CHECK: fir.result %[[VAL_13]] : f32 +// CHECK: } +// CHECK: return %[[VAL_6]] : f32 +// CHECK: } + +func.func @dot_product_complex(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> complex { + %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> complex + return %res : complex +} +// CHECK-LABEL: func.func @dot_product_complex( +// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, +// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +// CHECK: %[[VAL_6:.*]] = fir.undefined complex +// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { +// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex +// CHECK: %[[VAL_15:.*]] = fir.extract_value %[[VAL_12]], [1 : index] : (complex) -> f32 +// CHECK: %[[VAL_16:.*]] = arith.negf %[[VAL_15]] : f32 +// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_12]], %[[VAL_16]], [1 : index] : (complex, f32) -> complex +// CHECK: %[[VAL_18:.*]] = fir.mulc %[[VAL_17]], %[[VAL_14]] : complex +// CHECK: %[[VAL_19:.*]] = fir.addc %[[VAL_11]], %[[VAL_18]] : complex +// CHECK: fir.result %[[VAL_19]] : complex +// CHECK: } +// CHECK: return %[[VAL_9]] : complex +// CHECK: } + +func.func @dot_product_real_complex(%arg0: !hlfir.expr, %arg1: !hlfir.expr>) -> complex { + %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr>) -> complex + return %res : complex +} +// CHECK-LABEL: func.func @dot_product_real_complex( +// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +// CHECK: %[[VAL_6:.*]] = fir.undefined complex +// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { +// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr, index) -> f32 +// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +// CHECK: %[[VAL_14:.*]] = fir.undefined complex +// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_16]], %[[VAL_12]], [0 : index] : (complex, f32) -> complex +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex +// CHECK: %[[VAL_19:.*]] = fir.extract_value %[[VAL_17]], [1 : index] : (complex) -> f32 +// CHECK: %[[VAL_20:.*]] = arith.negf %[[VAL_19]] : f32 +// CHECK: %[[VAL_21:.*]] = fir.insert_value %[[VAL_17]], %[[VAL_20]], [1 : index] : (complex, f32) -> complex +// CHECK: %[[VAL_22:.*]] = fir.mulc %[[VAL_21]], %[[VAL_18]] : complex +// CHECK: %[[VAL_23:.*]] = fir.addc %[[VAL_11]], %[[VAL_22]] : complex +// CHECK: fir.result %[[VAL_23]] : complex +// CHECK: } +// CHECK: return %[[VAL_9]] : complex +// CHECK: } + +func.func @dot_product_logical(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> !fir.logical<4> { + %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> !fir.logical<4> + return %res : !fir.logical<4> +} +// CHECK-LABEL: func.func @dot_product_logical( +// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, +// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> !fir.logical<4> { +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant false +// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4> +// CHECK: %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_6]]) -> (!fir.logical<4>) { +// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<1> +// CHECK: %[[VAL_11:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<4> +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_9]] : (!fir.logical<4>) -> i1 +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (!fir.logical<1>) -> i1 +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 +// CHECK: %[[VAL_15:.*]] = arith.andi %[[VAL_13]], %[[VAL_14]] : i1 +// CHECK: %[[VAL_16:.*]] = arith.ori %[[VAL_12]], %[[VAL_15]] : i1 +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4> +// CHECK: fir.result %[[VAL_17]] : !fir.logical<4> +// CHECK: } +// CHECK: return %[[VAL_7]] : !fir.logical<4> +// CHECK: } + +func.func @dot_product_known_dim(%arg0: !hlfir.expr<10xf32>, %arg1: !hlfir.expr) -> f32 { + %res1 = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<10xf32>, !hlfir.expr) -> f32 + %res2 = hlfir.dot_product %arg1 %arg0 : (!hlfir.expr, !hlfir.expr<10xf32>) -> f32 + %res = arith.addf %res1, %res2 : f32 + return %res : f32 +} +// CHECK-LABEL: func.func @dot_product_known_dim( +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 10 : index +// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] +// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] From ce6e66ddecbd1ddfa3be9be2ac881931d5ae71a4 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 17 Jan 2025 12:08:22 -0800 Subject: [PATCH 59/88] [RISCV] Add coverage for an upcoming select lowering change A select between an add and a sub can be either a vrsub followed by add (reducing register pressure), or a vmacc. The former will be implemented in an upcoming review. --- .../RISCV/rvv/fixed-vectors-select-addsub.ll | 351 ++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll new file mode 100644 index 0000000000000..008c39df70839 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll @@ -0,0 +1,351 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + + +define <1 x i32> @select_addsub_v1i32(<1 x i1> %cc, <1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: select_addsub_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <1 x i32> %a, %b + %sub = add nsw <1 x i32> %a, %b + %res = select <1 x i1> %cc, <1 x i32> %add, <1 x i32> %sub + ret <1 x i32> %res +} + +define <2 x i32> @select_addsub_v2i32(<2 x i1> %cc, <2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: select_addsub_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <2 x i32> %a, %b + %sub = add nsw <2 x i32> %a, %b + %res = select <2 x i1> %cc, <2 x i32> %add, <2 x i32> %sub + ret <2 x i32> %res +} + +define <4 x i32> @select_addsub_v4i32(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_addsub_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <4 x i32> %a, %b + %sub = add nsw <4 x i32> %a, %b + %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub + ret <4 x i32> %res +} + +define <4 x i32> @select_addsub_v4i32_select_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_addsub_v4i32_select_swapped: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsub.vv v10, v8, v9 +; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <4 x i32> %a, %b + %sub = add nsw <4 x i32> %a, %b + %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add + ret <4 x i32> %res +} + +define <4 x i32> @select_addsub_v4i32_add_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_addsub_v4i32_add_swapped: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v10, v9, v8 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <4 x i32> %a, %b + %sub = add nsw <4 x i32> %b, %a + %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub + ret <4 x i32> %res +} + +define <4 x i32> @select_addsub_v4i32_both_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_addsub_v4i32_both_swapped: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsub.vv v10, v8, v9 +; CHECK-NEXT: vadd.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <4 x i32> %a, %b + %sub = add nsw <4 x i32> %b, %a + %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add + ret <4 x i32> %res +} + +define <4 x i32> @select_addsub_v4i32_sub_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_addsub_v4i32_sub_swapped: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v10, v9, v8 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <4 x i32> %a, %b + %sub = add nsw <4 x i32> %b, %a + %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub + ret <4 x i32> %res +} + +define <8 x i32> @select_addsub_v8i32(<8 x i1> %cc, <8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: select_addsub_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vadd.vv v12, v8, v10 +; CHECK-NEXT: vsub.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %add = sub nsw <8 x i32> %a, %b + %sub = add nsw <8 x i32> %a, %b + %res = select <8 x i1> %cc, <8 x i32> %add, <8 x i32> %sub + ret <8 x i32> %res +} + +define <16 x i32> @select_addsub_v16i32(<16 x i1> %cc, <16 x i32> %a, <16 x i32> %b) { +; CHECK-LABEL: select_addsub_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vadd.vv v16, v8, v12 +; CHECK-NEXT: vsub.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %add = sub nsw <16 x i32> %a, %b + %sub = add nsw <16 x i32> %a, %b + %res = select <16 x i1> %cc, <16 x i32> %add, <16 x i32> %sub + ret <16 x i32> %res +} + +define <32 x i32> @select_addsub_v32i32(<32 x i1> %cc, <32 x i32> %a, <32 x i32> %b) { +; CHECK-LABEL: select_addsub_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vadd.vv v24, v8, v16 +; CHECK-NEXT: vsub.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %add = sub nsw <32 x i32> %a, %b + %sub = add nsw <32 x i32> %a, %b + %res = select <32 x i1> %cc, <32 x i32> %add, <32 x i32> %sub + ret <32 x i32> %res +} + +define <64 x i32> @select_addsub_v64i32(<64 x i1> %cc, <64 x i32> %a, <64 x i32> %b) { +; CHECK-LABEL: select_addsub_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vadd.vv v24, v8, v16 +; CHECK-NEXT: vsub.vv v24, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vadd.vv v16, v16, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsub.vv v16, v24, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %add = sub nsw <64 x i32> %a, %b + %sub = add nsw <64 x i32> %a, %b + %res = select <64 x i1> %cc, <64 x i32> %add, <64 x i32> %sub + ret <64 x i32> %res +} + +define <8 x i64> @select_addsub_v8i64(<8 x i1> %cc, <8 x i64> %a, <8 x i64> %b) { +; CHECK-LABEL: select_addsub_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vadd.vv v16, v8, v12 +; CHECK-NEXT: vsub.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %add = sub nsw <8 x i64> %a, %b + %sub = add nsw <8 x i64> %a, %b + %res = select <8 x i1> %cc, <8 x i64> %add, <8 x i64> %sub + ret <8 x i64> %res +} + +define <8 x i16> @select_addsub_v8i16(<8 x i1> %cc, <8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: select_addsub_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <8 x i16> %a, %b + %sub = add nsw <8 x i16> %a, %b + %res = select <8 x i1> %cc, <8 x i16> %add, <8 x i16> %sub + ret <8 x i16> %res +} + +define <8 x i8> @select_addsub_v8i8(<8 x i1> %cc, <8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: select_addsub_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <8 x i8> %a, %b + %sub = add nsw <8 x i8> %a, %b + %res = select <8 x i1> %cc, <8 x i8> %add, <8 x i8> %sub + ret <8 x i8> %res +} + +define <8 x i1> @select_addsub_v8i1(<8 x i1> %cc, <8 x i1> %a, <8 x i1> %b) { +; CHECK-LABEL: select_addsub_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmxor.mm v0, v8, v9 +; CHECK-NEXT: ret + %add = sub nsw <8 x i1> %a, %b + %sub = add nsw <8 x i1> %a, %b + %res = select <8 x i1> %cc, <8 x i1> %add, <8 x i1> %sub + ret <8 x i1> %res +} + +define <8 x i2> @select_addsub_v8i2(<8 x i1> %cc, <8 x i2> %a, <8 x i2> %b) { +; CHECK-LABEL: select_addsub_v8i2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <8 x i2> %a, %b + %sub = add nsw <8 x i2> %a, %b + %res = select <8 x i1> %cc, <8 x i2> %add, <8 x i2> %sub + ret <8 x i2> %res +} + +define <4 x i32> @select_addsub_v4i32_constmask(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_addsub_v4i32_constmask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v0, 5 +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <4 x i32> %a, %b + %sub = add nsw <4 x i32> %a, %b + %res = select <4 x i1> , <4 x i32> %add, <4 x i32> %sub + ret <4 x i32> %res +} + +define <4 x i32> @select_addsub_v4i32_constmask2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_addsub_v4i32_constmask2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v0, 5 +; CHECK-NEXT: vadd.vv v10, v9, v8 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <4 x i32> %a, %b + %sub = add nsw <4 x i32> %b, %a + %res = select <4 x i1> , <4 x i32> %add, <4 x i32> %sub + ret <4 x i32> %res +} + +; Same pattern as above, but the select is disguised as a shuffle +define <4 x i32> @select_addsub_v4i32_as_shuffle(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_addsub_v4i32_as_shuffle: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v0, 5 +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <4 x i32> %a, %b + %sub = add nsw <4 x i32> %a, %b + %res = shufflevector <4 x i32> %add, <4 x i32> %sub, <4 x i32> + ret <4 x i32> %res +} + +; Same pattern as above, but the select is disguised as a shuffle +define <4 x i32> @select_addsub_v4i32_as_shuffle2(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: select_addsub_v4i32_as_shuffle2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v0, 5 +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vsub.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %add = sub nsw <4 x i32> %b, %a + %sub = add nsw <4 x i32> %a, %b + %res = shufflevector <4 x i32> %add, <4 x i32> %sub, <4 x i32> + ret <4 x i32> %res +} From 5a735a28c8bf916a4a6d9068f01d80fdf1affa8a Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 17 Jan 2025 15:24:57 -0500 Subject: [PATCH 60/88] [HLSL][Sema] Fixed Diagnostics that assumed only two arguments (#122772) In the below code B varies over the arg list via a loop. However, the diagnostics do not vary with the loop. Fix so that diagnostics can vary with B. --- clang/lib/Sema/SemaHLSL.cpp | 36 ++++++++++++------- clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl | 36 +++++++++++++++++-- 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 238e19651dc6b..5001883003ee2 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1688,13 +1688,21 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { auto *VecTyA = ArgTyA->getAs(); SourceLocation BuiltinLoc = TheCall->getBeginLoc(); + bool AllBArgAreVectors = true; for (unsigned i = 1; i < TheCall->getNumArgs(); ++i) { ExprResult B = TheCall->getArg(i); QualType ArgTyB = B.get()->getType(); auto *VecTyB = ArgTyB->getAs(); - if (VecTyA == nullptr && VecTyB == nullptr) - return false; - + if (VecTyB == nullptr) + AllBArgAreVectors &= false; + if (VecTyA && VecTyB == nullptr) { + // Note: if we get here 'B' is scalar which + // requires a VectorSplat on ArgN + S->Diag(BuiltinLoc, diag::err_vec_builtin_non_vector) + << TheCall->getDirectCallee() << /*useAllTerminology*/ true + << SourceRange(A.get()->getBeginLoc(), B.get()->getEndLoc()); + return true; + } if (VecTyA && VecTyB) { bool retValue = false; if (VecTyA->getElementType() != VecTyB->getElementType()) { @@ -1712,21 +1720,23 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { // HLSLVectorTruncation. S->Diag(BuiltinLoc, diag::err_vec_builtin_incompatible_vector) << TheCall->getDirectCallee() << /*useAllTerminology*/ true - << SourceRange(TheCall->getArg(0)->getBeginLoc(), - TheCall->getArg(1)->getEndLoc()); + << SourceRange(A.get()->getBeginLoc(), B.get()->getEndLoc()); retValue = true; } - return retValue; + if (retValue) + return retValue; } } - // Note: if we get here one of the args is a scalar which - // requires a VectorSplat on Arg0 or Arg1 - S->Diag(BuiltinLoc, diag::err_vec_builtin_non_vector) - << TheCall->getDirectCallee() << /*useAllTerminology*/ true - << SourceRange(TheCall->getArg(0)->getBeginLoc(), - TheCall->getArg(1)->getEndLoc()); - return true; + if (VecTyA == nullptr && AllBArgAreVectors) { + // Note: if we get here 'A' is a scalar which + // requires a VectorSplat on Arg0 + S->Diag(BuiltinLoc, diag::err_vec_builtin_non_vector) + << TheCall->getDirectCallee() << /*useAllTerminology*/ true + << SourceRange(A.get()->getBeginLoc(), A.get()->getEndLoc()); + return true; + } + return false; } static bool CheckArgTypeMatches(Sema *S, Expr *Arg, QualType ExpectedType) { diff --git a/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl index 56c8b32cc14e0..c77a07602b390 100644 --- a/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl @@ -20,16 +20,38 @@ float2 test_lerp_no_second_arg(float2 p0) { // expected-error@-1 {{no matching function for call to 'lerp'}} } -float2 test_lerp_vector_size_mismatch(float3 p0, float2 p1) { +float2 test_lerp_vector_trunc_warn1(float3 p0) { + return lerp(p0, p0, p0); + // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 2 'float' values)}} +} + +float2 test_lerp_vector_trunc_warn2(float3 p0, float2 p1) { return lerp(p0, p0, p1); // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 2 'float' values)}} + // expected-warning@-2 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 2 'float' values)}} +} + +float2 test_lerp_vector_trunc_warn3(float3 p0, float2 p1) { + return lerp(p0, p1, p0); + // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 2 'float' values)}} + // expected-warning@-2 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 2 'float' values)}} } -float2 test_lerp_builtin_vector_size_mismatch(float3 p0, float2 p1) { +float2 test_lerp_builtin_vector_size_mismatch_Arg1(float3 p0, float2 p1) { return __builtin_hlsl_lerp(p0, p1, p1); // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must have the same type}} } +float2 test_lerp_builtin_vector_size_mismatch_Arg2(float3 p0, float2 p1) { + return __builtin_hlsl_lerp(p1, p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must have the same type}} +} + +float2 test_lerp_builtin_vector_size_mismatch_Arg3(float3 p0, float2 p1) { + return __builtin_hlsl_lerp(p1, p1, p0); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must have the same type}} +} + float test_lerp_scalar_mismatch(float p0, half p1) { return lerp(p1, p0, p1); // expected-error@-1 {{call to 'lerp' is ambiguous}} @@ -45,6 +67,16 @@ float2 test_builtin_lerp_float2_splat(float p0, float2 p1) { // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} } +float2 test_builtin_lerp_float2_splat2(double p0, double2 p1) { + return __builtin_hlsl_lerp(p1, p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} +} + +float2 test_builtin_lerp_float2_splat3(double p0, double2 p1) { + return __builtin_hlsl_lerp(p1, p1, p0); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} +} + float3 test_builtin_lerp_float3_splat(float p0, float3 p1) { return __builtin_hlsl_lerp(p0, p1, p1); // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} From e237e37c62804b5caa7ca5501d7372d7b01167ad Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Fri, 17 Jan 2025 12:16:22 -0800 Subject: [PATCH 61/88] [libc] Fix riscv32 Block assertion failure on #117815 --- libc/src/__support/block.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/libc/src/__support/block.h b/libc/src/__support/block.h index 50a745326eac3..a58c38bbb7acb 100644 --- a/libc/src/__support/block.h +++ b/libc/src/__support/block.h @@ -227,15 +227,17 @@ class Block { *new (&next()->prev_) size_t = outer_size(); } - /// Marks this block as the last one in the chain. Makes next() return - /// nullptr. - LIBC_INLINE void mark_last() { next_ |= LAST_MASK; } - - LIBC_INLINE Block(size_t outer_size) : next_(outer_size) { - LIBC_ASSERT(outer_size % alignof(max_align_t) == 0 && - "block sizes must be aligned"); + LIBC_INLINE Block(size_t outer_size, bool is_last) : next_(outer_size) { + // Last blocks are not usable, so they need not have sizes aligned to + // max_align_t. Their lower bits must still be free, so they must be aligned + // to Block. + LIBC_ASSERT( + outer_size % (is_last ? alignof(Block) : alignof(max_align_t)) == 0 && + "block sizes must be aligned"); LIBC_ASSERT(is_usable_space_aligned(alignof(max_align_t)) && "usable space must be aligned to a multiple of max_align_t"); + if (is_last) + next_ |= LAST_MASK; } LIBC_INLINE bool is_usable_space_aligned(size_t alignment) const { @@ -325,7 +327,13 @@ class Block { LIBC_ASSERT(reinterpret_cast(bytes.data()) % alignof(Block) == 0 && "block start must be suitably aligned"); - return ::new (bytes.data()) Block(bytes.size()); + return ::new (bytes.data()) Block(bytes.size(), /*is_last=*/false); + } + + LIBC_INLINE static void make_last_block(cpp::byte *start) { + LIBC_ASSERT(reinterpret_cast(start) % alignof(Block) == 0 && + "block start must be suitably aligned"); + ::new (start) Block(sizeof(Block), /*is_last=*/true); } /// Offset from this block to the previous block. 0 if this is the first @@ -353,7 +361,7 @@ class Block { static constexpr size_t PREV_FIELD_SIZE = sizeof(prev_); }; -static_assert(alignof(max_align_t) >= 4, +static_assert(alignof(Block) >= 4, "at least 2 bits must be available in block sizes for flags"); LIBC_INLINE @@ -380,9 +388,8 @@ optional Block::init(ByteSpan region) { auto *last_start_ptr = reinterpret_cast(last_start); Block *block = as_block({reinterpret_cast(block_start), last_start_ptr}); - Block *last = as_block({last_start_ptr, sizeof(Block)}); + make_last_block(last_start_ptr); block->mark_free(); - last->mark_last(); return block; } From dce5d1fcb956e0218268dc1bf3e128bd2586df77 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Fri, 17 Jan 2025 20:33:11 +0000 Subject: [PATCH 62/88] [FMV][GlobalOpt] Do not statically resolve non-FMV callers. (#123383) This fixes a runtime regression in the llvm testsuite: https://lab.llvm.org/buildbot/#/builders/198/builds/1237 On clang-aarch64-sve2-vla: predres FAIL A 'predres' version is unexpectedly trapping on GravitonG4. My explanation is that when the caller in not a versioned function, the compiler exclusively relies on the command line option, or target attribute to deduce whether a feature is available. However, there is no guarantee that in reality the host supports those implied features. This is a quickfix. We may rather change the mcpu option in the llvm testsuite build instead. --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 12 ++++++++++-- llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index bf0cacc6224be..eb97d8b4a74f3 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2785,8 +2785,16 @@ static bool OptimizeNonTrivialIFuncs( } else { // We can't reason much about non-FMV callers. Just pick the highest // priority callee if it matches, otherwise bail. - if (I > 0 || !implies(CallerBits, CalleeBits)) - continue; + // if (I > 0 || !implies(CallerBits, CalleeBits)) + // + // FIXME: This is causing a regression in the llvm test suite, + // specifically a 'predres' version is unexpectedly trapping on + // GravitonG4. My explanation is that when the caller in not a + // versioned function, the compiler exclusively relies on the + // command line option, or target attribute to deduce whether a + // feature is available. However, there is no guarantee that in + // reality the host supports those implied features. + continue; } auto &Calls = CallSites[Caller]; for (CallBase *CS : Calls) diff --git a/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll index 4b6a19d3f05cf..fa817a8cbf417 100644 --- a/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll +++ b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll @@ -221,7 +221,7 @@ resolver_entry: define i32 @caller4() #8 { ; CHECK-LABEL: define i32 @caller4( ; CHECK-SAME: ) local_unnamed_addr #[[ATTR7:[0-9]+]] { -; CHECK: [[CALL:%.*]] = tail call i32 @test_non_fmv_caller._Maes() +; CHECK: [[CALL:%.*]] = tail call i32 @test_non_fmv_caller() ; entry: %call = tail call i32 @test_non_fmv_caller() From 7b3a3530163f576708530dc6bcd6a0196a5423ae Mon Sep 17 00:00:00 2001 From: mishaobu <46725114+mishaobu@users.noreply.github.com> Date: Fri, 17 Jan 2025 21:34:11 +0100 Subject: [PATCH 63/88] [mlir][spirv] Add common SPIRV Extended Ops for Vectors (#122322) Support for the following SPIR-V Extended Ops: * 67: Distance * 68: Cross * 69: Normalize * 71: Reflect (Found here: https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html) --- .../mlir/Dialect/SPIRV/IR/SPIRVGLOps.td | 116 +++++++++++++++++ mlir/test/Dialect/SPIRV/IR/gl-ops.mlir | 122 ++++++++++++++++++ mlir/test/Target/SPIRV/gl-ops.mlir | 20 +++ 3 files changed, 258 insertions(+) diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVGLOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVGLOps.td index 3fcfb086f9662..1cdfa02f81787 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVGLOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVGLOps.td @@ -1029,6 +1029,122 @@ def SPIRV_GLFMixOp : let hasVerifier = 0; } +// ----- + +def SPIRV_GLDistanceOp : SPIRV_GLOp<"Distance", 67, [ + Pure, + AllTypesMatch<["p0", "p1"]>, + TypesMatchWith<"result type must match operand element type", + "p0", "result", + "::mlir::getElementTypeOrSelf($_self)"> + ]> { + let summary = "Return distance between two points"; + + let description = [{ + Result is the distance between p0 and p1, i.e., length(p0 - p1). + + The operands must all be a scalar or vector whose component type is floating-point. + + Result Type must be a scalar of the same type as the component type of the operands. + + #### Example: + + ```mlir + %2 = spirv.GL.Distance %0, %1 : vector<3xf32>, vector<3xf32> -> f32 + ``` + }]; + + let arguments = (ins + SPIRV_ScalarOrVectorOf:$p0, + SPIRV_ScalarOrVectorOf:$p1 + ); + + let results = (outs + SPIRV_Float:$result + ); + + let assemblyFormat = [{ + operands attr-dict `:` type($p0) `,` type($p1) `->` type($result) + }]; + + let hasVerifier = 0; +} + +// ----- + +def SPIRV_GLCrossOp : SPIRV_GLBinaryArithmeticOp<"Cross", 68, SPIRV_Float> { + let summary = "Return the cross product of two 3-component vectors"; + + let description = [{ + Result is the cross product of x and y, i.e., the resulting components are, in order: + + x[1] * y[2] - y[1] * x[2] + + x[2] * y[0] - y[2] * x[0] + + x[0] * y[1] - y[0] * x[1] + + All the operands must be vectors of 3 components of a floating-point type. + + Result Type and the type of all operands must be the same type. + + #### Example: + + ```mlir + %2 = spirv.GL.Cross %0, %1 : vector<3xf32> + %3 = spirv.GL.Cross %0, %1 : vector<3xf16> + ``` + }]; +} + +// ----- + +def SPIRV_GLNormalizeOp : SPIRV_GLUnaryArithmeticOp<"Normalize", 69, SPIRV_Float> { + let summary = "Normalizes a vector operand"; + + let description = [{ + Result is the vector in the same direction as x but with a length of 1. + + The operand x must be a scalar or vector whose component type is floating-point. + + Result Type and the type of x must be the same type. + + #### Example: + + ```mlir + %2 = spirv.GL.Normalize %0 : vector<3xf32> + %3 = spirv.GL.Normalize %1 : vector<4xf16> + ``` + }]; +} + +// ----- + +def SPIRV_GLReflectOp : SPIRV_GLBinaryArithmeticOp<"Reflect", 71, SPIRV_Float> { + let summary = "Calculate reflection direction vector"; + + let description = [{ + For the incident vector I and surface orientation N, the result is the reflection direction: + + I - 2 * dot(N, I) * N + + N must already be normalized in order to achieve the desired result. + + The operands must all be a scalar or vector whose component type is floating-point. + + Result Type and the type of all operands must be the same type. + + #### Example: + + ```mlir + %2 = spirv.GL.Reflect %0, %1 : f32 + %3 = spirv.GL.Reflect %0, %1 : vector<3xf32> + ``` + }]; +} + +// ---- + def SPIRV_GLFindUMsbOp : SPIRV_GLUnaryArithmeticOp<"FindUMsb", 75, SPIRV_Int32> { let summary = "Unsigned-integer most-significant bit"; diff --git a/mlir/test/Dialect/SPIRV/IR/gl-ops.mlir b/mlir/test/Dialect/SPIRV/IR/gl-ops.mlir index 3683e5b469b17..beda3872bc8d2 100644 --- a/mlir/test/Dialect/SPIRV/IR/gl-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/gl-ops.mlir @@ -541,3 +541,125 @@ func.func @findumsb(%arg0 : i64) -> () { %2 = spirv.GL.FindUMsb %arg0 : i64 return } + +// ----- + +//===----------------------------------------------------------------------===// +// spirv.GL.Distance +//===----------------------------------------------------------------------===// + +func.func @distance_scalar(%arg0 : f32, %arg1 : f32) { + // CHECK: spirv.GL.Distance {{%.*}}, {{%.*}} : f32, f32 -> f32 + %0 = spirv.GL.Distance %arg0, %arg1 : f32, f32 -> f32 + return +} + +func.func @distance_vector(%arg0 : vector<3xf32>, %arg1 : vector<3xf32>) { + // CHECK: spirv.GL.Distance {{%.*}}, {{%.*}} : vector<3xf32>, vector<3xf32> -> f32 + %0 = spirv.GL.Distance %arg0, %arg1 : vector<3xf32>, vector<3xf32> -> f32 + return +} + +// ----- + +func.func @distance_invalid_type(%arg0 : i32, %arg1 : i32) { + // expected-error @+1 {{'spirv.GL.Distance' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16}} + %0 = spirv.GL.Distance %arg0, %arg1 : i32, i32 -> f32 + return +} + +// ----- + +func.func @distance_arg_mismatch(%arg0 : vector<3xf32>, %arg1 : vector<4xf32>) { + // expected-error @+1 {{'spirv.GL.Distance' op failed to verify that all of {p0, p1} have same type}} + %0 = spirv.GL.Distance %arg0, %arg1 : vector<3xf32>, vector<4xf32> -> f32 + return +} + +// ----- + +func.func @distance_invalid_vector_size(%arg0 : vector<5xf32>, %arg1 : vector<5xf32>) { + // expected-error @+1 {{'spirv.GL.Distance' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16}} + %0 = spirv.GL.Distance %arg0, %arg1 : vector<5xf32>, vector<5xf32> -> f32 + return +} + +// ----- + +func.func @distance_invalid_result(%arg0 : f32, %arg1 : f32) { + // expected-error @+1 {{'spirv.GL.Distance' op result #0 must be 16/32/64-bit float}} + %0 = spirv.GL.Distance %arg0, %arg1 : f32, f32 -> i32 + return +} + +// ----- + +//===----------------------------------------------------------------------===// +// spirv.GL.Cross +//===----------------------------------------------------------------------===// + +func.func @cross(%arg0 : vector<3xf32>, %arg1 : vector<3xf32>) { + %2 = spirv.GL.Cross %arg0, %arg1 : vector<3xf32> + // CHECK: %{{.+}} = spirv.GL.Cross %{{.+}}, %{{.+}} : vector<3xf32> + return +} + +// ----- + +func.func @cross_invalid_type(%arg0 : vector<3xi32>, %arg1 : vector<3xi32>) { + // expected-error @+1 {{'spirv.GL.Cross' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'vector<3xi32>'}} + %0 = spirv.GL.Cross %arg0, %arg1 : vector<3xi32> + return +} + +// ----- + +//===----------------------------------------------------------------------===// +// spirv.GL.Normalize +//===----------------------------------------------------------------------===// + +func.func @normalize_scalar(%arg0 : f32) { + %2 = spirv.GL.Normalize %arg0 : f32 + // CHECK: %{{.+}} = spirv.GL.Normalize %{{.+}} : f32 + return +} + +func.func @normalize_vector(%arg0 : vector<3xf32>) { + %2 = spirv.GL.Normalize %arg0 : vector<3xf32> + // CHECK: %{{.+}} = spirv.GL.Normalize %{{.+}} : vector<3xf32> + return +} + +// ----- + +func.func @normalize_invalid_type(%arg0 : i32) { + // expected-error @+1 {{'spirv.GL.Normalize' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + %0 = spirv.GL.Normalize %arg0 : i32 + return +} + +// ----- + +//===----------------------------------------------------------------------===// +// spirv.GL.Reflect +//===----------------------------------------------------------------------===// + +func.func @reflect_scalar(%arg0 : f32, %arg1 : f32) { + %2 = spirv.GL.Reflect %arg0, %arg1 : f32 + // CHECK: %{{.+}} = spirv.GL.Reflect %{{.+}}, %{{.+}} : f32 + return +} + +func.func @reflect_vector(%arg0 : vector<3xf32>, %arg1 : vector<3xf32>) { + %2 = spirv.GL.Reflect %arg0, %arg1 : vector<3xf32> + // CHECK: %{{.+}} = spirv.GL.Reflect %{{.+}}, %{{.+}} : vector<3xf32> + return +} + +// ----- + +func.func @reflect_invalid_type(%arg0 : i32, %arg1 : i32) { + // expected-error @+1 {{'spirv.GL.Reflect' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + %0 = spirv.GL.Reflect %arg0, %arg1 : i32 + return +} diff --git a/mlir/test/Target/SPIRV/gl-ops.mlir b/mlir/test/Target/SPIRV/gl-ops.mlir index fff1adf0ae12c..119304cea7d4a 100644 --- a/mlir/test/Target/SPIRV/gl-ops.mlir +++ b/mlir/test/Target/SPIRV/gl-ops.mlir @@ -81,4 +81,24 @@ spirv.module Logical GLSL450 requires #spirv.vce { %2 = spirv.GL.FindUMsb %arg0 : i32 spirv.Return } + +spirv.func @vector(%arg0 : f32, %arg1 : vector<3xf32>, %arg2 : vector<3xf32>) "None" { + // CHECK: {{%.*}} = spirv.GL.Cross {{%.*}}, {{%.*}} : vector<3xf32> + %0 = spirv.GL.Cross %arg1, %arg2 : vector<3xf32> + // CHECK: {{%.*}} = spirv.GL.Normalize {{%.*}} : f32 + %1 = spirv.GL.Normalize %arg0 : f32 + // CHECK: {{%.*}} = spirv.GL.Normalize {{%.*}} : vector<3xf32> + %2 = spirv.GL.Normalize %arg1 : vector<3xf32> + // CHECK: {{%.*}} = spirv.GL.Reflect {{%.*}}, {{%.*}} : f32 + %3 = spirv.GL.Reflect %arg0, %arg0 : f32 + // CHECK: {{%.*}} = spirv.GL.Reflect {{%.*}}, {{%.*}} : vector<3xf32> + %4 = spirv.GL.Reflect %arg1, %arg2 : vector<3xf32> + // CHECK: {{%.*}} = spirv.GL.Distance {{%.*}}, {{%.*}} : f32, f32 -> f32 + %5 = spirv.GL.Distance %arg0, %arg0 : f32, f32 -> f32 + // CHECK: {{%.*}} = spirv.GL.Distance {{%.*}}, {{%.*}} : vector<3xf32>, vector<3xf32> -> f32 + %6 = spirv.GL.Distance %arg1, %arg2 : vector<3xf32>, vector<3xf32> -> f32 + spirv.Return + } + + } From e5a28a3b4d09a3ab128439a0f4eb2659e0b1978b Mon Sep 17 00:00:00 2001 From: mishaobu <46725114+mishaobu@users.noreply.github.com> Date: Fri, 17 Jan 2025 21:47:34 +0100 Subject: [PATCH 64/88] [mlir][spirv] Add MatrixTimesVector Op (#122302) (From SPIRV reference here : https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpMatrixTimesVector) --- .../mlir/Dialect/SPIRV/IR/SPIRVBase.td | 6 ++- .../mlir/Dialect/SPIRV/IR/SPIRVMatrixOps.td | 41 +++++++++++++++++++ mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp | 27 ++++++++++++ mlir/test/Dialect/SPIRV/IR/matrix-ops.mlir | 31 ++++++++++++++ mlir/test/Target/SPIRV/matrix.mlir | 7 ++++ 5 files changed, 111 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td index a4c01c0bc3418..469a9a0ef01dd 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td @@ -4171,6 +4171,7 @@ def SPIRV_IsArrayType : CPred<"::llvm::isa<::mlir::spirv::ArrayType>($_self)">; def SPIRV_IsCooperativeMatrixType : CPred<"::llvm::isa<::mlir::spirv::CooperativeMatrixType>($_self)">; def SPIRV_IsImageType : CPred<"::llvm::isa<::mlir::spirv::ImageType>($_self)">; +def SPIRV_IsVectorType : CPred<"::llvm::isa<::mlir::VectorType>($_self)">; def SPIRV_IsMatrixType : CPred<"::llvm::isa<::mlir::spirv::MatrixType>($_self)">; def SPIRV_IsPtrType : CPred<"::llvm::isa<::mlir::spirv::PointerType>($_self)">; def SPIRV_IsRTArrayType : CPred<"::llvm::isa<::mlir::spirv::RuntimeArrayType>($_self)">; @@ -4202,6 +4203,8 @@ def SPIRV_AnyCooperativeMatrix : DialectType; def SPIRV_AnyImage : DialectType; +def SPIRV_AnyVector : DialectType; def SPIRV_AnyMatrix : DialectType; def SPIRV_AnyRTArray : DialectType; def SPIRV_OC_OpFMod : I32EnumAttrCase<"OpFMod", 141>; def SPIRV_OC_OpVectorTimesScalar : I32EnumAttrCase<"OpVectorTimesScalar", 142>; def SPIRV_OC_OpMatrixTimesScalar : I32EnumAttrCase<"OpMatrixTimesScalar", 143>; +def SPIRV_OC_OpMatrixTimesVector : I32EnumAttrCase<"OpMatrixTimesVector", 145>; def SPIRV_OC_OpMatrixTimesMatrix : I32EnumAttrCase<"OpMatrixTimesMatrix", 146>; def SPIRV_OC_OpDot : I32EnumAttrCase<"OpDot", 148>; def SPIRV_OC_OpIAddCarry : I32EnumAttrCase<"OpIAddCarry", 149>; @@ -4553,7 +4557,7 @@ def SPIRV_OpcodeAttr : SPIRV_OC_OpFSub, SPIRV_OC_OpIMul, SPIRV_OC_OpFMul, SPIRV_OC_OpUDiv, SPIRV_OC_OpSDiv, SPIRV_OC_OpFDiv, SPIRV_OC_OpUMod, SPIRV_OC_OpSRem, SPIRV_OC_OpSMod, SPIRV_OC_OpFRem, SPIRV_OC_OpFMod, - SPIRV_OC_OpVectorTimesScalar, SPIRV_OC_OpMatrixTimesScalar, + SPIRV_OC_OpVectorTimesScalar, SPIRV_OC_OpMatrixTimesScalar, SPIRV_OC_OpMatrixTimesVector, SPIRV_OC_OpMatrixTimesMatrix, SPIRV_OC_OpDot, SPIRV_OC_OpIAddCarry, SPIRV_OC_OpISubBorrow, SPIRV_OC_OpUMulExtended, SPIRV_OC_OpSMulExtended, SPIRV_OC_OpIsNan, SPIRV_OC_OpIsInf, SPIRV_OC_OpOrdered, SPIRV_OC_OpUnordered, diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMatrixOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMatrixOps.td index a6f0f41429bcb..5bd99386e0085 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMatrixOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVMatrixOps.td @@ -114,6 +114,47 @@ def SPIRV_MatrixTimesScalarOp : SPIRV_Op< // ----- +def SPIRV_MatrixTimesVectorOp : SPIRV_Op<"MatrixTimesVector", [Pure]> { + let summary = "Linear-algebraic multiply of matrix X vector."; + + let description = [{ + Result Type must be a vector of floating-point type. + + Matrix must be an OpTypeMatrix whose Column Type is Result Type. + + Vector must be a vector with the same Component Type as the Component Type in Result Type. Its number of components must equal the number of columns in Matrix. + + #### Example: + + ```mlir + %0 = spirv.MatrixTimesVector %matrix, %vector : + !spirv.matrix<3 x vector<2xf32>>, vector<3xf32> -> vector<2xf32> + ``` + }]; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[]>, + Capability<[SPIRV_C_Matrix]> + ]; + + let arguments = (ins + SPIRV_AnyMatrix:$matrix, + SPIRV_AnyVector:$vector + ); + + let results = (outs + SPIRV_AnyVector:$result + ); + + let assemblyFormat = [{ + operands attr-dict `:` type($matrix) `,` type($vector) `->` type($result) + }]; +} + +// ----- + def SPIRV_TransposeOp : SPIRV_Op<"Transpose", [Pure]> { let summary = "Transpose a matrix."; diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp index 26559c1321db5..040bf6a34cea7 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -1698,6 +1698,33 @@ LogicalResult spirv::TransposeOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// spirv.MatrixTimesVector +//===----------------------------------------------------------------------===// + +LogicalResult spirv::MatrixTimesVectorOp::verify() { + auto matrixType = llvm::cast(getMatrix().getType()); + auto vectorType = llvm::cast(getVector().getType()); + auto resultType = llvm::cast(getType()); + + if (matrixType.getNumColumns() != vectorType.getNumElements()) + return emitOpError("matrix columns (") + << matrixType.getNumColumns() << ") must match vector operand size (" + << vectorType.getNumElements() << ")"; + + if (resultType.getNumElements() != matrixType.getNumRows()) + return emitOpError("result size (") + << resultType.getNumElements() << ") must match the matrix rows (" + << matrixType.getNumRows() << ")"; + + auto matrixElementType = matrixType.getElementType(); + if (matrixElementType != vectorType.getElementType() || + matrixElementType != resultType.getElementType()) + return emitOpError("matrix, vector, and result element types must match"); + + return success(); +} + //===----------------------------------------------------------------------===// // spirv.MatrixTimesMatrix //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SPIRV/IR/matrix-ops.mlir b/mlir/test/Dialect/SPIRV/IR/matrix-ops.mlir index 372fcc6e514b9..37e7514d664ef 100644 --- a/mlir/test/Dialect/SPIRV/IR/matrix-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/matrix-ops.mlir @@ -29,6 +29,13 @@ spirv.module Logical GLSL450 requires #spirv.vce { spirv.ReturnValue %result : !spirv.matrix<3 x vector<3xf32>> } + // CHECK-LABEL: @matrix_times_vector_1 + spirv.func @matrix_times_vector_1(%arg0: !spirv.matrix<3 x vector<4xf32>>, %arg1: vector<3xf32>) -> vector<4xf32> "None" { + // CHECK: {{%.*}} = spirv.MatrixTimesVector {{%.*}}, {{%.*}} : !spirv.matrix<3 x vector<4xf32>>, vector<3xf32> -> vector<4xf32> + %result = spirv.MatrixTimesVector %arg0, %arg1 : !spirv.matrix<3 x vector<4xf32>>, vector<3xf32> -> vector<4xf32> + spirv.ReturnValue %result : vector<4xf32> + } + // CHECK-LABEL: @matrix_times_matrix_1 spirv.func @matrix_times_matrix_1(%arg0: !spirv.matrix<3 x vector<3xf32>>, %arg1: !spirv.matrix<3 x vector<3xf32>>) -> !spirv.matrix<3 x vector<3xf32>> "None"{ // CHECK: {{%.*}} = spirv.MatrixTimesMatrix {{%.*}}, {{%.*}} : !spirv.matrix<3 x vector<3xf32>>, !spirv.matrix<3 x vector<3xf32>> -> !spirv.matrix<3 x vector<3xf32>> @@ -124,3 +131,27 @@ func.func @matrix_times_matrix_component_type_mismatch_2(%arg0 : !spirv.matrix<3 %result = spirv.MatrixTimesMatrix %arg0, %arg1 : !spirv.matrix<3 x vector<3xf64>>, !spirv.matrix<3 x vector<3xf32>> -> !spirv.matrix<3 x vector<3xf32>> return } + +// ----- + +func.func @matrix_times_vector_element_type_mismatch(%arg0: !spirv.matrix<4 x vector<3xf32>>, %arg1: vector<4xf16>) { + // expected-error @+1 {{matrix, vector, and result element types must match}} + %result = spirv.MatrixTimesVector %arg0, %arg1 : !spirv.matrix<4 x vector<3xf32>>, vector<4xf16> -> vector<3xf32> + return +} + +// ----- + +func.func @matrix_times_vector_row_mismatch(%arg0: !spirv.matrix<4 x vector<3xf32>>, %arg1: vector<4xf32>) { + // expected-error @+1 {{spirv.MatrixTimesVector' op result size (4) must match the matrix rows (3)}} + %result = spirv.MatrixTimesVector %arg0, %arg1 : !spirv.matrix<4 x vector<3xf32>>, vector<4xf32> -> vector<4xf32> + return +} + +// ----- + +func.func @matrix_times_vector_column_mismatch(%arg0: !spirv.matrix<4 x vector<3xf32>>, %arg1: vector<3xf32>) { + // expected-error @+1 {{spirv.MatrixTimesVector' op matrix columns (4) must match vector operand size (3)}} + %result = spirv.MatrixTimesVector %arg0, %arg1 : !spirv.matrix<4 x vector<3xf32>>, vector<3xf32> -> vector<3xf32> + return +} diff --git a/mlir/test/Target/SPIRV/matrix.mlir b/mlir/test/Target/SPIRV/matrix.mlir index 2a391df4bff39..0ec1dc27e4e93 100644 --- a/mlir/test/Target/SPIRV/matrix.mlir +++ b/mlir/test/Target/SPIRV/matrix.mlir @@ -36,6 +36,13 @@ spirv.module Logical GLSL450 requires #spirv.vce { spirv.ReturnValue %result : !spirv.matrix<2 x vector<3xf32>> } + // CHECK-LABEL: @matrix_times_vector_1 + spirv.func @matrix_times_vector_1(%arg0: !spirv.matrix<3 x vector<4xf32>>, %arg1: vector<3xf32>) -> vector<4xf32> "None" { + // CHECK: {{%.*}} = spirv.MatrixTimesVector {{%.*}}, {{%.*}} : !spirv.matrix<3 x vector<4xf32>>, vector<3xf32> -> vector<4xf32> + %result = spirv.MatrixTimesVector %arg0, %arg1 : !spirv.matrix<3 x vector<4xf32>>, vector<3xf32> -> vector<4xf32> + spirv.ReturnValue %result : vector<4xf32> + } + // CHECK-LABEL: @matrix_times_matrix_1 spirv.func @matrix_times_matrix_1(%arg0: !spirv.matrix<3 x vector<3xf32>>, %arg1: !spirv.matrix<3 x vector<3xf32>>) -> !spirv.matrix<3 x vector<3xf32>> "None"{ // CHECK: {{%.*}} = spirv.MatrixTimesMatrix {{%.*}}, {{%.*}} : !spirv.matrix<3 x vector<3xf32>>, !spirv.matrix<3 x vector<3xf32>> -> !spirv.matrix<3 x vector<3xf32>> From b1bf95c081297305b2c9810ff04a9dac7216b434 Mon Sep 17 00:00:00 2001 From: George Chaltas Date: Fri, 17 Jan 2025 13:56:58 -0700 Subject: [PATCH 65/88] ReduxWidth check for 0 (#123257) Added assert to check for underflow of ReduxWidth modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Source code analysis flagged the operation (ReduxWwidth - 1) as potential underflow, since ReduxWidth is unsigned. Realize that this should never happen if everything is working right, but added an assert to check for it just in case. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0bf01a8c680bf..40dd0d4cc4ef6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -20095,6 +20095,7 @@ class HorizontalReduction { NumRegs = TTI.getNumberOfRegisters(TTI.getRegisterClassForType(true, Tp)); while (NumParts > NumRegs) { + assert(ReduxWidth > 0 && "ReduxWidth is unexpectedly 0."); ReduxWidth = bit_floor(ReduxWidth - 1); VectorType *Tp = getWidenedType(ScalarTy, ReduxWidth); NumParts = TTI.getNumberOfParts(Tp); From 078dfd825309480b6a9df3ea44f6adf04913b339 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 17 Jan 2025 12:53:35 -0800 Subject: [PATCH 66/88] [RISCV] Fix variable naming in recently pre-committed test I'd swapped the %add/%sub names, and then copied that repeatedly. Oops. While updating, remove the nsw as those should be irrelevant and is another copy-paste mistake on my part. --- .../RISCV/rvv/fixed-vectors-select-addsub.ll | 120 +++++++++--------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll index 008c39df70839..ee9609992c049 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll @@ -13,9 +13,9 @@ define <1 x i32> @select_addsub_v1i32(<1 x i1> %cc, <1 x i32> %a, <1 x i32> %b) ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <1 x i32> %a, %b - %sub = add nsw <1 x i32> %a, %b - %res = select <1 x i1> %cc, <1 x i32> %add, <1 x i32> %sub + %sub = sub <1 x i32> %a, %b + %add = add <1 x i32> %a, %b + %res = select <1 x i1> %cc, <1 x i32> %sub, <1 x i32> %add ret <1 x i32> %res } @@ -27,9 +27,9 @@ define <2 x i32> @select_addsub_v2i32(<2 x i1> %cc, <2 x i32> %a, <2 x i32> %b) ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <2 x i32> %a, %b - %sub = add nsw <2 x i32> %a, %b - %res = select <2 x i1> %cc, <2 x i32> %add, <2 x i32> %sub + %sub = sub <2 x i32> %a, %b + %add = add <2 x i32> %a, %b + %res = select <2 x i1> %cc, <2 x i32> %sub, <2 x i32> %add ret <2 x i32> %res } @@ -41,9 +41,9 @@ define <4 x i32> @select_addsub_v4i32(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <4 x i32> %a, %b - %sub = add nsw <4 x i32> %a, %b - %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub + %sub = sub <4 x i32> %a, %b + %add = add <4 x i32> %a, %b + %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add ret <4 x i32> %res } @@ -55,9 +55,9 @@ define <4 x i32> @select_addsub_v4i32_select_swapped(<4 x i1> %cc, <4 x i32> %a, ; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <4 x i32> %a, %b - %sub = add nsw <4 x i32> %a, %b - %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add + %sub = sub <4 x i32> %a, %b + %add = add <4 x i32> %a, %b + %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub ret <4 x i32> %res } @@ -69,9 +69,9 @@ define <4 x i32> @select_addsub_v4i32_add_swapped(<4 x i1> %cc, <4 x i32> %a, <4 ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <4 x i32> %a, %b - %sub = add nsw <4 x i32> %b, %a - %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub + %sub = sub <4 x i32> %a, %b + %add = add <4 x i32> %b, %a + %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add ret <4 x i32> %res } @@ -83,9 +83,9 @@ define <4 x i32> @select_addsub_v4i32_both_swapped(<4 x i1> %cc, <4 x i32> %a, < ; CHECK-NEXT: vadd.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <4 x i32> %a, %b - %sub = add nsw <4 x i32> %b, %a - %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add + %sub = sub <4 x i32> %a, %b + %add = add <4 x i32> %b, %a + %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub ret <4 x i32> %res } @@ -97,9 +97,9 @@ define <4 x i32> @select_addsub_v4i32_sub_swapped(<4 x i1> %cc, <4 x i32> %a, <4 ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <4 x i32> %a, %b - %sub = add nsw <4 x i32> %b, %a - %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub + %sub = sub <4 x i32> %a, %b + %add = add <4 x i32> %b, %a + %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add ret <4 x i32> %res } @@ -111,9 +111,9 @@ define <8 x i32> @select_addsub_v8i32(<8 x i1> %cc, <8 x i32> %a, <8 x i32> %b) ; CHECK-NEXT: vsub.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %add = sub nsw <8 x i32> %a, %b - %sub = add nsw <8 x i32> %a, %b - %res = select <8 x i1> %cc, <8 x i32> %add, <8 x i32> %sub + %sub = sub <8 x i32> %a, %b + %add = add <8 x i32> %a, %b + %res = select <8 x i1> %cc, <8 x i32> %sub, <8 x i32> %add ret <8 x i32> %res } @@ -125,9 +125,9 @@ define <16 x i32> @select_addsub_v16i32(<16 x i1> %cc, <16 x i32> %a, <16 x i32> ; CHECK-NEXT: vsub.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %add = sub nsw <16 x i32> %a, %b - %sub = add nsw <16 x i32> %a, %b - %res = select <16 x i1> %cc, <16 x i32> %add, <16 x i32> %sub + %sub = sub <16 x i32> %a, %b + %add = add <16 x i32> %a, %b + %res = select <16 x i1> %cc, <16 x i32> %sub, <16 x i32> %add ret <16 x i32> %res } @@ -140,9 +140,9 @@ define <32 x i32> @select_addsub_v32i32(<32 x i1> %cc, <32 x i32> %a, <32 x i32> ; CHECK-NEXT: vsub.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret - %add = sub nsw <32 x i32> %a, %b - %sub = add nsw <32 x i32> %a, %b - %res = select <32 x i1> %cc, <32 x i32> %add, <32 x i32> %sub + %sub = sub <32 x i32> %a, %b + %add = add <32 x i32> %a, %b + %res = select <32 x i1> %cc, <32 x i32> %sub, <32 x i32> %add ret <32 x i32> %res } @@ -214,9 +214,9 @@ define <64 x i32> @select_addsub_v64i32(<64 x i1> %cc, <64 x i32> %a, <64 x i32> ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret - %add = sub nsw <64 x i32> %a, %b - %sub = add nsw <64 x i32> %a, %b - %res = select <64 x i1> %cc, <64 x i32> %add, <64 x i32> %sub + %sub = sub <64 x i32> %a, %b + %add = add <64 x i32> %a, %b + %res = select <64 x i1> %cc, <64 x i32> %sub, <64 x i32> %add ret <64 x i32> %res } @@ -228,9 +228,9 @@ define <8 x i64> @select_addsub_v8i64(<8 x i1> %cc, <8 x i64> %a, <8 x i64> %b) ; CHECK-NEXT: vsub.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %add = sub nsw <8 x i64> %a, %b - %sub = add nsw <8 x i64> %a, %b - %res = select <8 x i1> %cc, <8 x i64> %add, <8 x i64> %sub + %sub = sub <8 x i64> %a, %b + %add = add <8 x i64> %a, %b + %res = select <8 x i1> %cc, <8 x i64> %sub, <8 x i64> %add ret <8 x i64> %res } @@ -242,9 +242,9 @@ define <8 x i16> @select_addsub_v8i16(<8 x i1> %cc, <8 x i16> %a, <8 x i16> %b) ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <8 x i16> %a, %b - %sub = add nsw <8 x i16> %a, %b - %res = select <8 x i1> %cc, <8 x i16> %add, <8 x i16> %sub + %sub = sub <8 x i16> %a, %b + %add = add <8 x i16> %a, %b + %res = select <8 x i1> %cc, <8 x i16> %sub, <8 x i16> %add ret <8 x i16> %res } @@ -256,9 +256,9 @@ define <8 x i8> @select_addsub_v8i8(<8 x i1> %cc, <8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <8 x i8> %a, %b - %sub = add nsw <8 x i8> %a, %b - %res = select <8 x i1> %cc, <8 x i8> %add, <8 x i8> %sub + %sub = sub <8 x i8> %a, %b + %add = add <8 x i8> %a, %b + %res = select <8 x i1> %cc, <8 x i8> %sub, <8 x i8> %add ret <8 x i8> %res } @@ -268,9 +268,9 @@ define <8 x i1> @select_addsub_v8i1(<8 x i1> %cc, <8 x i1> %a, <8 x i1> %b) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmxor.mm v0, v8, v9 ; CHECK-NEXT: ret - %add = sub nsw <8 x i1> %a, %b - %sub = add nsw <8 x i1> %a, %b - %res = select <8 x i1> %cc, <8 x i1> %add, <8 x i1> %sub + %sub = sub <8 x i1> %a, %b + %add = add <8 x i1> %a, %b + %res = select <8 x i1> %cc, <8 x i1> %sub, <8 x i1> %add ret <8 x i1> %res } @@ -282,9 +282,9 @@ define <8 x i2> @select_addsub_v8i2(<8 x i1> %cc, <8 x i2> %a, <8 x i2> %b) { ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <8 x i2> %a, %b - %sub = add nsw <8 x i2> %a, %b - %res = select <8 x i1> %cc, <8 x i2> %add, <8 x i2> %sub + %sub = sub <8 x i2> %a, %b + %add = add <8 x i2> %a, %b + %res = select <8 x i1> %cc, <8 x i2> %sub, <8 x i2> %add ret <8 x i2> %res } @@ -297,9 +297,9 @@ define <4 x i32> @select_addsub_v4i32_constmask(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <4 x i32> %a, %b - %sub = add nsw <4 x i32> %a, %b - %res = select <4 x i1> , <4 x i32> %add, <4 x i32> %sub + %sub = sub <4 x i32> %a, %b + %add = add <4 x i32> %a, %b + %res = select <4 x i1> , <4 x i32> %sub, <4 x i32> %add ret <4 x i32> %res } @@ -312,9 +312,9 @@ define <4 x i32> @select_addsub_v4i32_constmask2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <4 x i32> %a, %b - %sub = add nsw <4 x i32> %b, %a - %res = select <4 x i1> , <4 x i32> %add, <4 x i32> %sub + %sub = sub <4 x i32> %a, %b + %add = add <4 x i32> %b, %a + %res = select <4 x i1> , <4 x i32> %sub, <4 x i32> %add ret <4 x i32> %res } @@ -328,9 +328,9 @@ define <4 x i32> @select_addsub_v4i32_as_shuffle(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <4 x i32> %a, %b - %sub = add nsw <4 x i32> %a, %b - %res = shufflevector <4 x i32> %add, <4 x i32> %sub, <4 x i32> + %sub = sub <4 x i32> %a, %b + %add = add <4 x i32> %a, %b + %res = shufflevector <4 x i32> %sub, <4 x i32> %add, <4 x i32> ret <4 x i32> %res } @@ -344,8 +344,8 @@ define <4 x i32> @select_addsub_v4i32_as_shuffle2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vsub.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %add = sub nsw <4 x i32> %b, %a - %sub = add nsw <4 x i32> %a, %b - %res = shufflevector <4 x i32> %add, <4 x i32> %sub, <4 x i32> + %sub = sub <4 x i32> %b, %a + %add = add <4 x i32> %a, %b + %res = shufflevector <4 x i32> %sub, <4 x i32> %add, <4 x i32> ret <4 x i32> %res } From 8b0c774f8afa323637a89dc455d9dd054cd5bffa Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Fri, 17 Jan 2025 13:04:28 -0800 Subject: [PATCH 67/88] [lld][InstrProf][NFC] Fix typo in help message (#123390) --- lld/MachO/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 39191af7dc169..4c89f96c3ebaa 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -133,7 +133,7 @@ def print_symbol_order_eq: Joined<["--"], "print-symbol-order=">, def irpgo_profile: Separate<["--"], "irpgo-profile">, Group; def irpgo_profile_eq: Joined<["--"], "irpgo-profile=">, Alias(irpgo_profile)>, MetaVarName<"">, - HelpText<"Read the IRPGO for use with -bp-startup-sort and other profile-guided optimizations">, + HelpText<"Read the IRPGO for use with --bp-startup-sort and other profile-guided optimizations">, Group; def bp_startup_sort: Joined<["--"], "bp-startup-sort=">, MetaVarName<"[none,function]">, From 5db28679da38bee65feb55b803a23aceee568f44 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Fri, 17 Jan 2025 12:54:41 -0800 Subject: [PATCH 68/88] [libc] Fix malloc riscv32 test failures from #117815 --- libc/test/src/__support/block_test.cpp | 7 +++++-- libc/test/src/__support/freestore_test.cpp | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/libc/test/src/__support/block_test.cpp b/libc/test/src/__support/block_test.cpp index c2d9833fb9439..904ac5c66994d 100644 --- a/libc/test/src/__support/block_test.cpp +++ b/libc/test/src/__support/block_test.cpp @@ -75,8 +75,11 @@ TEST(LlvmLibcBlockTest, CannotCreateTooSmallBlock) { TEST(LlvmLibcBlockTest, CanSplitBlock) { constexpr size_t kN = 1024; - // Give the split position a large alignment. - constexpr size_t kSplitN = 512 + Block::PREV_FIELD_SIZE; + + // Choose a split position such that the next block's usable space is 512 + // bytes from this one's. This should be sufficient for any machine's + // alignment. + const size_t kSplitN = Block::inner_size(512); array bytes; auto result = Block::init(bytes); diff --git a/libc/test/src/__support/freestore_test.cpp b/libc/test/src/__support/freestore_test.cpp index a32badb39b1e6..468f0033aedc1 100644 --- a/libc/test/src/__support/freestore_test.cpp +++ b/libc/test/src/__support/freestore_test.cpp @@ -26,6 +26,10 @@ TEST(LlvmLibcFreeStore, TooSmall) { Block *too_small = *maybeBlock; maybeBlock = too_small->split(Block::PREV_FIELD_SIZE); ASSERT_TRUE(maybeBlock.has_value()); + // On platforms with high alignment the smallest legal block may be large + // enough for a node. + if (too_small->outer_size() > sizeof(Block) + sizeof(FreeList::Node)) + return; Block *remainder = *maybeBlock; FreeStore store; From a440c3ea89ea25a88ec265fe6130a6eb04840423 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Fri, 17 Jan 2025 13:14:03 -0800 Subject: [PATCH 69/88] [libc] Correct previous malloc fix --- libc/test/src/__support/freestore_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/test/src/__support/freestore_test.cpp b/libc/test/src/__support/freestore_test.cpp index 468f0033aedc1..5d49f47a5fcdf 100644 --- a/libc/test/src/__support/freestore_test.cpp +++ b/libc/test/src/__support/freestore_test.cpp @@ -28,7 +28,7 @@ TEST(LlvmLibcFreeStore, TooSmall) { ASSERT_TRUE(maybeBlock.has_value()); // On platforms with high alignment the smallest legal block may be large // enough for a node. - if (too_small->outer_size() > sizeof(Block) + sizeof(FreeList::Node)) + if (too_small->inner_size() >= sizeof(Block) + sizeof(FreeList::Node)) return; Block *remainder = *maybeBlock; From 98067a322596a5fd1d850b2645250a082e8b18f2 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Fri, 17 Jan 2025 13:15:53 -0800 Subject: [PATCH 70/88] [libc] Outer size, not inner size --- libc/test/src/__support/freestore_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/test/src/__support/freestore_test.cpp b/libc/test/src/__support/freestore_test.cpp index 5d49f47a5fcdf..39292b6a1211b 100644 --- a/libc/test/src/__support/freestore_test.cpp +++ b/libc/test/src/__support/freestore_test.cpp @@ -28,7 +28,7 @@ TEST(LlvmLibcFreeStore, TooSmall) { ASSERT_TRUE(maybeBlock.has_value()); // On platforms with high alignment the smallest legal block may be large // enough for a node. - if (too_small->inner_size() >= sizeof(Block) + sizeof(FreeList::Node)) + if (too_small->outer_size() >= sizeof(Block) + sizeof(FreeList::Node)) return; Block *remainder = *maybeBlock; From 8f18f36b4906872ee0838ade2c0367c77b6f5bc0 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Fri, 17 Jan 2025 13:26:21 -0800 Subject: [PATCH 71/88] [lldb] Skip unreliable test under ASAN --- lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py index f4f30b6677e53..580ad38ab51c1 100644 --- a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py +++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py @@ -672,6 +672,7 @@ def test_indexedVariables_with_raw_child_for_synthetics(self): self.do_test_indexedVariables(enableSyntheticChildDebugging=True) @skipIfWindows + @skipIfAsan # FIXME this fails with a non-asan issue on green dragon. def test_registers(self): """ Test that registers whose byte size is the size of a pointer on From 04383d63130a72c1280d80ec3f5a09dfdf607462 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Fri, 17 Jan 2025 13:31:56 -0800 Subject: [PATCH 72/88] [Static analysis] Encodes a filename before inserting it into a URL. (#120810) This fixes a bug where report links generated from files such as StylePrimitiveNumericTypes+Conversions.h in WebKit result in an error. --------- Co-authored-by: Brianna Fan --- clang/tools/scan-build/bin/scan-build | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/clang/tools/scan-build/bin/scan-build b/clang/tools/scan-build/bin/scan-build index 37241c6d85c5b..b90e635d31757 100755 --- a/clang/tools/scan-build/bin/scan-build +++ b/clang/tools/scan-build/bin/scan-build @@ -820,7 +820,8 @@ ENDTEXT } # Emit the "View" link. - print OUT "View Report"; + my $EncodedReport = URLEscape($ReportFile); + print OUT "View Report"; # Emit REPORTBUG markers. print OUT "\n\n"; @@ -1465,6 +1466,16 @@ sub HtmlEscape { return $tmp; } +##----------------------------------------------------------------------------## +# URLEscape - encode characters that are special in URLs +##----------------------------------------------------------------------------## + +sub URLEscape { + my $arg = shift || ''; + $arg =~ s/\+/%2B/g; + return $arg; +} + ##----------------------------------------------------------------------------## # ShellEscape - backslash escape characters that are special to the shell ##----------------------------------------------------------------------------## From d6315afff078cb4309b5614562b32520f6e3a2eb Mon Sep 17 00:00:00 2001 From: vporpo Date: Fri, 17 Jan 2025 13:36:42 -0800 Subject: [PATCH 73/88] [SandboxVec][InstrMaps] EraseInstr callback (#123256) This patch hooks up InstrMaps to the Sandbox IR callbacks such that it gets updated when instructions get erased. --- .../Vectorize/SandboxVectorizer/InstrMaps.h | 32 +++++++++++++++++++ .../SandboxVectorizer/Passes/BottomUpVec.h | 2 +- .../SandboxVectorizer/Passes/BottomUpVec.cpp | 6 ++-- .../SandboxVectorizer/InstrMapsTest.cpp | 11 ++++++- .../SandboxVectorizer/LegalityTest.cpp | 6 ++-- 5 files changed, 49 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h index 586de53f3a724..c931319d3b002 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h @@ -13,9 +13,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/SandboxIR/Context.h" +#include "llvm/SandboxIR/Instruction.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include namespace llvm::sandboxir { @@ -30,8 +33,37 @@ class InstrMaps { /// with the same lane, as they may be coming from vectorizing different /// original values. DenseMap> VectorToOrigLaneMap; + Context &Ctx; + std::optional EraseInstrCB; + +private: + void notifyEraseInstr(Value *V) { + // We don't know if V is an original or a vector value. + auto It = OrigToVectorMap.find(V); + if (It != OrigToVectorMap.end()) { + // V is an original value. + // Remove it from VectorToOrigLaneMap. + Value *Vec = It->second; + VectorToOrigLaneMap[Vec].erase(V); + // Now erase V from OrigToVectorMap. + OrigToVectorMap.erase(It); + } else { + // V is a vector value. + // Go over the original values it came from and remove them from + // OrigToVectorMap. + for (auto [Orig, Lane] : VectorToOrigLaneMap[V]) + OrigToVectorMap.erase(Orig); + // Now erase V from VectorToOrigLaneMap. + VectorToOrigLaneMap.erase(V); + } + } public: + InstrMaps(Context &Ctx) : Ctx(Ctx) { + EraseInstrCB = Ctx.registerEraseInstrCallback( + [this](Instruction *I) { notifyEraseInstr(I); }); + } + ~InstrMaps() { Ctx.unregisterEraseInstrCallback(*EraseInstrCB); } /// \Returns the vector value that we got from vectorizing \p Orig, or /// nullptr if not found. Value *getVectorForOrig(Value *Orig) const { diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h index 69cea3c4c7b53..dd3012f7c9b55 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h @@ -28,7 +28,7 @@ class BottomUpVec final : public FunctionPass { std::unique_ptr Legality; DenseSet DeadInstrCandidates; /// Maps scalars to vectors. - InstrMaps IMaps; + std::unique_ptr IMaps; /// Creates and returns a vector instruction that replaces the instructions in /// \p Bndl. \p Operands are the already vectorized operands. diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index 6b2032be53560..b8e2697839a3c 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -161,7 +161,7 @@ Value *BottomUpVec::createVectorInstr(ArrayRef Bndl, auto *VecI = CreateVectorInstr(Bndl, Operands); if (VecI != nullptr) { Change = true; - IMaps.registerVector(Bndl, VecI); + IMaps->registerVector(Bndl, VecI); } return VecI; } @@ -315,10 +315,10 @@ bool BottomUpVec::tryVectorize(ArrayRef Bndl) { } bool BottomUpVec::runOnFunction(Function &F, const Analyses &A) { - IMaps.clear(); + IMaps = std::make_unique(F.getContext()); Legality = std::make_unique( A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(), - F.getContext(), IMaps); + F.getContext(), *IMaps); Change = false; const auto &DL = F.getParent()->getDataLayout(); unsigned VecRegBits = diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp index 1c8ef7e351aba..37ce7c962f1d9 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp @@ -53,7 +53,7 @@ define void @foo(i8 %v0, i8 %v1, i8 %v2, i8 %v3, <2 x i8> %vec) { auto *VAdd0 = cast(&*It++); [[maybe_unused]] auto *Ret = cast(&*It++); - sandboxir::InstrMaps IMaps; + sandboxir::InstrMaps IMaps(Ctx); // Check with empty IMaps. EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr); EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr); @@ -75,4 +75,13 @@ define void @foo(i8 %v0, i8 %v1, i8 %v2, i8 %v3, <2 x i8> %vec) { #ifndef NDEBUG EXPECT_DEATH(IMaps.registerVector({Add1, Add0}, VAdd0), ".*exists.*"); #endif // NDEBUG + // Check callbacks: erase original instr. + Add0->eraseFromParent(); + EXPECT_FALSE(IMaps.getOrigLane(VAdd0, Add0)); + EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add1), 1); + EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr); + // Check callbacks: erase vector instr. + VAdd0->eraseFromParent(); + EXPECT_FALSE(IMaps.getOrigLane(VAdd0, Add1)); + EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr); } diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 2e90462a633c1..069bfdba0a7cd 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -111,7 +111,7 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float auto *CmpSLT = cast(&*It++); auto *CmpSGT = cast(&*It++); - llvm::sandboxir::InstrMaps IMaps; + llvm::sandboxir::InstrMaps IMaps(Ctx); sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps); const auto &Result = Legality.canVectorize({St0, St1}, /*SkipScheduling=*/true); @@ -230,7 +230,7 @@ define void @foo(ptr %ptr) { auto *St0 = cast(&*It++); auto *St1 = cast(&*It++); - llvm::sandboxir::InstrMaps IMaps; + llvm::sandboxir::InstrMaps IMaps(Ctx); sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps); { // Can vectorize St0,St1. @@ -266,7 +266,7 @@ define void @foo() { }; sandboxir::Context Ctx(C); - llvm::sandboxir::InstrMaps IMaps; + llvm::sandboxir::InstrMaps IMaps(Ctx); sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps); EXPECT_TRUE( Matches(Legality.createLegalityResult(), "Widen")); From ae932becb2c952876edbb3591bfa997bf4629a4d Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Fri, 17 Jan 2025 17:01:00 -0500 Subject: [PATCH 74/88] [clang][Sema] Upstream HeuristicResolver from clangd (#121314) Fixes https://github.com/llvm/llvm-project/issues/121310 --- clang-tools-extra/clangd/CMakeLists.txt | 1 - clang-tools-extra/clangd/FindTarget.cpp | 2 +- clang-tools-extra/clangd/FindTarget.h | 4 +++- clang-tools-extra/clangd/InlayHints.cpp | 2 +- clang-tools-extra/clangd/ParsedAST.cpp | 2 +- clang-tools-extra/clangd/ParsedAST.h | 2 +- clang-tools-extra/clangd/SemanticHighlighting.cpp | 2 +- clang-tools-extra/clangd/XRefs.cpp | 2 +- clang-tools-extra/clangd/unittests/CMakeLists.txt | 1 - .../include/clang/Sema}/HeuristicResolver.h | 7 ++----- clang/lib/Sema/CMakeLists.txt | 1 + .../clangd => clang/lib/Sema}/HeuristicResolver.cpp | 4 +--- clang/unittests/Sema/CMakeLists.txt | 1 + .../unittests/Sema/HeuristicResolverTest.cpp | 3 +-- 14 files changed, 15 insertions(+), 19 deletions(-) rename {clang-tools-extra/clangd => clang/include/clang/Sema}/HeuristicResolver.h (95%) rename {clang-tools-extra/clangd => clang/lib/Sema}/HeuristicResolver.cpp (99%) rename clang-tools-extra/clangd/unittests/HeuristicResolverTests.cpp => clang/unittests/Sema/HeuristicResolverTest.cpp (99%) diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index d797ddce8c44d..6f10afe4a5625 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -91,7 +91,6 @@ add_clang_library(clangDaemon STATIC GlobalCompilationDatabase.cpp Headers.cpp HeaderSourceSwitch.cpp - HeuristicResolver.cpp Hover.cpp IncludeCleaner.cpp IncludeFixer.cpp diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index e702c6b3537a0..04fd6d437b7bd 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -8,7 +8,6 @@ #include "FindTarget.h" #include "AST.h" -#include "HeuristicResolver.h" #include "support/Logger.h" #include "clang/AST/ASTConcept.h" #include "clang/AST/ASTTypeTraits.h" @@ -35,6 +34,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" +#include "clang/Sema/HeuristicResolver.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" diff --git a/clang-tools-extra/clangd/FindTarget.h b/clang-tools-extra/clangd/FindTarget.h index b41c547095100..a7706804ce7ec 100644 --- a/clang-tools-extra/clangd/FindTarget.h +++ b/clang-tools-extra/clangd/FindTarget.h @@ -33,9 +33,11 @@ #include namespace clang { -namespace clangd { + class HeuristicResolver; +namespace clangd { + /// Describes the link between an AST node and a Decl it refers to. enum class DeclRelation : unsigned; /// A bitfield of DeclRelations. diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp index fefffeb4efc1a..1b1bcf78c9855 100644 --- a/clang-tools-extra/clangd/InlayHints.cpp +++ b/clang-tools-extra/clangd/InlayHints.cpp @@ -9,7 +9,6 @@ #include "../clang-tidy/utils/DesignatedInitializers.h" #include "AST.h" #include "Config.h" -#include "HeuristicResolver.h" #include "ParsedAST.h" #include "Protocol.h" #include "SourceCode.h" @@ -27,6 +26,7 @@ #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Sema/HeuristicResolver.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 725cbeb154cb8..89d6f26d0f150 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -20,7 +20,6 @@ #include "Feature.h" #include "FeatureModule.h" #include "Headers.h" -#include "HeuristicResolver.h" #include "IncludeCleaner.h" #include "IncludeFixer.h" #include "Preamble.h" @@ -53,6 +52,7 @@ #include "clang/Lex/Lexer.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Sema/HeuristicResolver.h" #include "clang/Serialization/ASTWriter.h" #include "clang/Tooling/CompilationDatabase.h" #include "clang/Tooling/Core/Diagnostic.h" diff --git a/clang-tools-extra/clangd/ParsedAST.h b/clang-tools-extra/clangd/ParsedAST.h index 8d9d1e6456926..82fac96360488 100644 --- a/clang-tools-extra/clangd/ParsedAST.h +++ b/clang-tools-extra/clangd/ParsedAST.h @@ -38,9 +38,9 @@ #include namespace clang { +class HeuristicResolver; class Sema; namespace clangd { -class HeuristicResolver; /// Stores and provides access to parsed AST. class ParsedAST { diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index e6d16af2495fe..86ca05644c703 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -9,7 +9,6 @@ #include "SemanticHighlighting.h" #include "Config.h" #include "FindTarget.h" -#include "HeuristicResolver.h" #include "ParsedAST.h" #include "Protocol.h" #include "SourceCode.h" @@ -27,6 +26,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Sema/HeuristicResolver.h" #include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 8d5ab2e491a40..0a093108b752c 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -10,7 +10,6 @@ #include "FindSymbols.h" #include "FindTarget.h" #include "Headers.h" -#include "HeuristicResolver.h" #include "IncludeCleaner.h" #include "ParsedAST.h" #include "Protocol.h" @@ -53,6 +52,7 @@ #include "clang/Index/IndexingOptions.h" #include "clang/Index/USRGeneration.h" #include "clang/Lex/Lexer.h" +#include "clang/Sema/HeuristicResolver.h" #include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index 8dba8088908d5..dffdcd5d014ca 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -64,7 +64,6 @@ add_unittest(ClangdUnitTests ClangdTests GlobalCompilationDatabaseTests.cpp HeadersTests.cpp HeaderSourceSwitchTests.cpp - HeuristicResolverTests.cpp HoverTests.cpp IncludeCleanerTests.cpp IndexActionTests.cpp diff --git a/clang-tools-extra/clangd/HeuristicResolver.h b/clang/include/clang/Sema/HeuristicResolver.h similarity index 95% rename from clang-tools-extra/clangd/HeuristicResolver.h rename to clang/include/clang/Sema/HeuristicResolver.h index c130e0677e86d..947de7a4e83ce 100644 --- a/clang-tools-extra/clangd/HeuristicResolver.h +++ b/clang/include/clang/Sema/HeuristicResolver.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEURISTICRESOLVER_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEURISTICRESOLVER_H +#ifndef LLVM_CLANG_SEMA_HEURISTICRESOLVER_H +#define LLVM_CLANG_SEMA_HEURISTICRESOLVER_H #include "clang/AST/Decl.h" #include @@ -24,8 +24,6 @@ class NamedDecl; class Type; class UnresolvedUsingValueDecl; -namespace clangd { - // This class handles heuristic resolution of declarations and types in template // code. // @@ -80,7 +78,6 @@ class HeuristicResolver { ASTContext &Ctx; }; -} // namespace clangd } // namespace clang #endif diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index a656b5062391a..19cf3a2db00fd 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -19,6 +19,7 @@ add_clang_library(clangSema CodeCompleteConsumer.cpp DeclSpec.cpp DelayedDiagnostic.cpp + HeuristicResolver.cpp HLSLExternalSemaSource.cpp IdentifierResolver.cpp JumpDiagnostics.cpp diff --git a/clang-tools-extra/clangd/HeuristicResolver.cpp b/clang/lib/Sema/HeuristicResolver.cpp similarity index 99% rename from clang-tools-extra/clangd/HeuristicResolver.cpp rename to clang/lib/Sema/HeuristicResolver.cpp index 9eb892e8e4a8e..7c1b8450b9633 100644 --- a/clang-tools-extra/clangd/HeuristicResolver.cpp +++ b/clang/lib/Sema/HeuristicResolver.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "HeuristicResolver.h" +#include "clang/Sema/HeuristicResolver.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclTemplate.h" @@ -14,7 +14,6 @@ #include "clang/AST/Type.h" namespace clang { -namespace clangd { namespace { @@ -466,5 +465,4 @@ const Type *HeuristicResolver::getPointeeType(const Type *T) const { return HeuristicResolverImpl(Ctx).getPointeeType(T); } -} // namespace clangd } // namespace clang diff --git a/clang/unittests/Sema/CMakeLists.txt b/clang/unittests/Sema/CMakeLists.txt index 7ded562e8edfa..17d39408000a4 100644 --- a/clang/unittests/Sema/CMakeLists.txt +++ b/clang/unittests/Sema/CMakeLists.txt @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS add_clang_unittest(SemaTests ExternalSemaSourceTest.cpp CodeCompleteTest.cpp + HeuristicResolverTest.cpp GslOwnerPointerInference.cpp SemaLookupTest.cpp SemaNoloadLookupTest.cpp diff --git a/clang-tools-extra/clangd/unittests/HeuristicResolverTests.cpp b/clang/unittests/Sema/HeuristicResolverTest.cpp similarity index 99% rename from clang-tools-extra/clangd/unittests/HeuristicResolverTests.cpp rename to clang/unittests/Sema/HeuristicResolverTest.cpp index e4b3822fc7eb7..c4f054683ccdc 100644 --- a/clang-tools-extra/clangd/unittests/HeuristicResolverTests.cpp +++ b/clang/unittests/Sema/HeuristicResolverTest.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "HeuristicResolver.h" +#include "clang/Sema/HeuristicResolver.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Tooling/Tooling.h" @@ -13,7 +13,6 @@ #include "gtest/gtest.h" using namespace clang::ast_matchers; -using clang::clangd::HeuristicResolver; using testing::ElementsAre; namespace clang { From 358d65463b215a18e731b3a5494d51e1bcbd1356 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 17 Jan 2025 14:06:15 -0800 Subject: [PATCH 75/88] [SandboxVectorizer] Fix a warning This patch fixes: third-party/unittest/googletest/include/gtest/gtest.h:1379:11: error: comparison of integers of different signs: 'const unsigned int' and 'const int' [-Werror,-Wsign-compare] --- .../Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp index 37ce7c962f1d9..1d7c8f9cdde04 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp @@ -78,7 +78,7 @@ define void @foo(i8 %v0, i8 %v1, i8 %v2, i8 %v3, <2 x i8> %vec) { // Check callbacks: erase original instr. Add0->eraseFromParent(); EXPECT_FALSE(IMaps.getOrigLane(VAdd0, Add0)); - EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add1), 1); + EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add1), 1U); EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr); // Check callbacks: erase vector instr. VAdd0->eraseFromParent(); From 18eec97f092311373163216419a2e4606fe51ba2 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 17 Jan 2025 22:08:15 +0000 Subject: [PATCH 76/88] [gn build] Port ae932becb2c9 --- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 1 - .../gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn | 1 - llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn | 1 + llvm/utils/gn/secondary/clang/unittests/Sema/BUILD.gn | 1 + 4 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index c6b45efef2990..b609d4a7462fb 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -105,7 +105,6 @@ static_library("clangd") { "GlobalCompilationDatabase.cpp", "HeaderSourceSwitch.cpp", "Headers.cpp", - "HeuristicResolver.cpp", "Hover.cpp", "IncludeCleaner.cpp", "IncludeFixer.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn index c79d5ad662b7f..7deefe9dc0613 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn @@ -80,7 +80,6 @@ unittest("ClangdTests") { "GlobalCompilationDatabaseTests.cpp", "HeaderSourceSwitchTests.cpp", "HeadersTests.cpp", - "HeuristicResolverTests.cpp", "HoverTests.cpp", "IncludeCleanerTests.cpp", "IndexActionTests.cpp", diff --git a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn index 5b067cb382c14..3b72177ee5d7c 100644 --- a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn @@ -44,6 +44,7 @@ static_library("Sema") { "DeclSpec.cpp", "DelayedDiagnostic.cpp", "HLSLExternalSemaSource.cpp", + "HeuristicResolver.cpp", "IdentifierResolver.cpp", "JumpDiagnostics.cpp", "MultiplexExternalSemaSource.cpp", diff --git a/llvm/utils/gn/secondary/clang/unittests/Sema/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Sema/BUILD.gn index 943840796a6a3..8a10db4bcc089 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Sema/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Sema/BUILD.gn @@ -18,6 +18,7 @@ unittest("SemaTests") { "CodeCompleteTest.cpp", "ExternalSemaSourceTest.cpp", "GslOwnerPointerInference.cpp", + "HeuristicResolverTest.cpp", "SemaLookupTest.cpp", "SemaNoloadLookupTest.cpp", ] From 65cd9e4c2f85bd119eb039df1c90e8c97cbffb0c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 17 Jan 2025 22:17:11 +0000 Subject: [PATCH 77/88] [VPlan] Make VPValue constructors protected. (NFC) Tighten access to constructors similar to ef1260acc0. VPValues should either be constructed by constructors of recipes defining them or should be live-ins created by VPlan (via getOrAddLiveIn). --- llvm/lib/Transforms/Vectorize/VPlanValue.h | 18 +- .../Transforms/Vectorize/VPlanTest.cpp | 283 +++++++++--------- 2 files changed, 160 insertions(+), 141 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 7aaf4002b8b3e..23e39ce89a3a4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -33,9 +33,11 @@ namespace llvm { class raw_ostream; class Value; class VPDef; +struct VPDoubleValueDef; class VPSlotTracker; class VPUser; class VPRecipeBase; +class VPInterleaveRecipe; // This is the base class of the VPlan Def/Use graph, used for modeling the data // flow into, within and out of the VPlan. VPValues can stand for live-ins @@ -44,12 +46,15 @@ class VPRecipeBase; class VPValue { friend class VPBuilder; friend class VPDef; + friend struct VPDoubleValueDef; friend class VPInstruction; + friend class VPInterleaveRecipe; friend struct VPlanTransforms; friend class VPBasicBlock; friend class VPInterleavedAccessInfo; friend class VPSlotTracker; friend class VPRecipeBase; + friend class VPlan; const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -65,6 +70,13 @@ class VPValue { VPValue(const unsigned char SC, Value *UV = nullptr, VPDef *Def = nullptr); + /// Create a live-in VPValue. + VPValue(Value *UV = nullptr) : VPValue(VPValueSC, UV, nullptr) {} + /// Create a VPValue for a \p Def which is a subclass of VPValue. + VPValue(VPDef *Def, Value *UV = nullptr) : VPValue(VPVRecipeSC, UV, Def) {} + /// Create a VPValue for a \p Def which defines multiple values. + VPValue(Value *UV, VPDef *Def) : VPValue(VPValueSC, UV, Def) {} + // DESIGN PRINCIPLE: Access to the underlying IR must be strictly limited to // the front-end and back-end of VPlan so that the middle-end is as // independent as possible of the underlying IR. We grant access to the @@ -84,12 +96,6 @@ class VPValue { VPVRecipeSC /// A VPValue sub-class that is a VPRecipeBase. }; - /// Create a live-in VPValue. - VPValue(Value *UV = nullptr) : VPValue(VPValueSC, UV, nullptr) {} - /// Create a VPValue for a \p Def which is a subclass of VPValue. - VPValue(VPDef *Def, Value *UV = nullptr) : VPValue(VPVRecipeSC, UV, Def) {} - /// Create a VPValue for a \p Def which defines multiple values. - VPValue(Value *UV, VPDef *Def) : VPValue(VPValueSC, UV, Def) {} VPValue(const VPValue &) = delete; VPValue &operator=(const VPValue &) = delete; diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 92a0ffd000e54..73dde0af8afdd 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -19,6 +19,7 @@ #include namespace llvm { + namespace { #define CHECK_ITERATOR(Range1, ...) \ @@ -131,8 +132,9 @@ TEST_F(VPInstructionTest, moveBefore) { } TEST_F(VPInstructionTest, setOperand) { - VPValue *VPV1 = new VPValue(); - VPValue *VPV2 = new VPValue(); + IntegerType *Int32 = IntegerType::get(C, 32); + VPValue *VPV1 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *VPV2 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPInstruction *I1 = new VPInstruction(0, {VPV1, VPV2}); EXPECT_EQ(1u, VPV1->getNumUsers()); EXPECT_EQ(I1, *VPV1->user_begin()); @@ -140,7 +142,7 @@ TEST_F(VPInstructionTest, setOperand) { EXPECT_EQ(I1, *VPV2->user_begin()); // Replace operand 0 (VPV1) with VPV3. - VPValue *VPV3 = new VPValue(); + VPValue *VPV3 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3)); I1->setOperand(0, VPV3); EXPECT_EQ(0u, VPV1->getNumUsers()); EXPECT_EQ(1u, VPV2->getNumUsers()); @@ -157,7 +159,7 @@ TEST_F(VPInstructionTest, setOperand) { EXPECT_EQ(I1, *std::next(VPV3->user_begin())); // Replace operand 0 (VPV3) with VPV4. - VPValue *VPV4 = new VPValue(); + VPValue *VPV4 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 4)); I1->setOperand(0, VPV4); EXPECT_EQ(1u, VPV3->getNumUsers()); EXPECT_EQ(I1, *VPV3->user_begin()); @@ -170,19 +172,16 @@ TEST_F(VPInstructionTest, setOperand) { EXPECT_EQ(I1, *std::next(VPV4->user_begin())); delete I1; - delete VPV1; - delete VPV2; - delete VPV3; - delete VPV4; } TEST_F(VPInstructionTest, replaceAllUsesWith) { - VPValue *VPV1 = new VPValue(); - VPValue *VPV2 = new VPValue(); + IntegerType *Int32 = IntegerType::get(C, 32); + VPValue *VPV1 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *VPV2 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPInstruction *I1 = new VPInstruction(0, {VPV1, VPV2}); // Replace all uses of VPV1 with VPV3. - VPValue *VPV3 = new VPValue(); + VPValue *VPV3 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3)); VPV1->replaceAllUsesWith(VPV3); EXPECT_EQ(VPV3, I1->getOperand(0)); EXPECT_EQ(VPV2, I1->getOperand(1)); @@ -217,14 +216,12 @@ TEST_F(VPInstructionTest, replaceAllUsesWith) { delete I1; delete I2; - delete VPV1; - delete VPV2; - delete VPV3; } TEST_F(VPInstructionTest, releaseOperandsAtDeletion) { - VPValue *VPV1 = new VPValue(); - VPValue *VPV2 = new VPValue(); + IntegerType *Int32 = IntegerType::get(C, 32); + VPValue *VPV1 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *VPV2 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPInstruction *I1 = new VPInstruction(0, {VPV1, VPV2}); EXPECT_EQ(1u, VPV1->getNumUsers()); @@ -236,9 +233,6 @@ TEST_F(VPInstructionTest, releaseOperandsAtDeletion) { EXPECT_EQ(0u, VPV1->getNumUsers()); EXPECT_EQ(0u, VPV2->getNumUsers()); - - delete VPV1; - delete VPV2; } using VPBasicBlockTest = VPlanTestBase; @@ -869,9 +863,11 @@ No successors using VPRecipeTest = VPlanTestBase; TEST_F(VPRecipeTest, CastVPInstructionToVPUser) { - VPValue Op1; - VPValue Op2; - VPInstruction Recipe(Instruction::Add, {&Op1, &Op2}); + IntegerType *Int32 = IntegerType::get(C, 32); + VPlan &Plan = getPlan(); + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPInstruction Recipe(Instruction::Add, {Op1, Op2}); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -879,14 +875,15 @@ TEST_F(VPRecipeTest, CastVPInstructionToVPUser) { } TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) { + VPlan &Plan = getPlan(); IntegerType *Int32 = IntegerType::get(C, 32); auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32), PoisonValue::get(Int32)); - VPValue Op1; - VPValue Op2; + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op1); + Args.push_back(Op1); + Args.push_back(Op2); VPWidenRecipe WidenR(*AI, make_range(Args.begin(), Args.end())); EXPECT_TRUE(isa(&WidenR)); VPRecipeBase *WidenRBase = &WidenR; @@ -896,17 +893,18 @@ TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) { } TEST_F(VPRecipeTest, CastVPWidenCallRecipeToVPUserAndVPDef) { + VPlan &Plan = getPlan(); IntegerType *Int32 = IntegerType::get(C, 32); FunctionType *FTy = FunctionType::get(Int32, false); Function *Fn = Function::Create(FTy, GlobalValue::ExternalLinkage, 0); auto *Call = CallInst::Create(FTy, Fn); - VPValue Op1; - VPValue Op2; - VPValue CalledFn(Call->getCalledFunction()); + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *CalledFn = Plan.getOrAddLiveIn(Call->getCalledFunction()); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op2); - Args.push_back(&CalledFn); + Args.push_back(Op1); + Args.push_back(Op2); + Args.push_back(CalledFn); VPWidenCallRecipe Recipe(Call, Fn, Args); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; @@ -922,17 +920,18 @@ TEST_F(VPRecipeTest, CastVPWidenCallRecipeToVPUserAndVPDef) { } TEST_F(VPRecipeTest, CastVPWidenSelectRecipeToVPUserAndVPDef) { + VPlan &Plan = getPlan(); IntegerType *Int1 = IntegerType::get(C, 1); IntegerType *Int32 = IntegerType::get(C, 32); auto *SelectI = SelectInst::Create( PoisonValue::get(Int1), PoisonValue::get(Int32), PoisonValue::get(Int32)); - VPValue Op1; - VPValue Op2; - VPValue Op3; + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *Op3 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op2); - Args.push_back(&Op3); + Args.push_back(Op1); + Args.push_back(Op2); + Args.push_back(Op3); VPWidenSelectRecipe WidenSelectR(*SelectI, make_range(Args.begin(), Args.end())); EXPECT_TRUE(isa(&WidenSelectR)); @@ -948,15 +947,16 @@ TEST_F(VPRecipeTest, CastVPWidenSelectRecipeToVPUserAndVPDef) { } TEST_F(VPRecipeTest, CastVPWidenGEPRecipeToVPUserAndVPDef) { + VPlan &Plan = getPlan(); IntegerType *Int32 = IntegerType::get(C, 32); PointerType *Int32Ptr = PointerType::get(Int32, 0); auto *GEP = GetElementPtrInst::Create(Int32, PoisonValue::get(Int32Ptr), PoisonValue::get(Int32)); - VPValue Op1; - VPValue Op2; + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op2); + Args.push_back(Op1); + Args.push_back(Op2); VPWidenGEPRecipe Recipe(GEP, make_range(Args.begin(), Args.end())); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; @@ -971,15 +971,17 @@ TEST_F(VPRecipeTest, CastVPWidenGEPRecipeToVPUserAndVPDef) { } TEST_F(VPRecipeTest, CastVPBlendRecipeToVPUser) { + VPlan &Plan = getPlan(); IntegerType *Int32 = IntegerType::get(C, 32); auto *Phi = PHINode::Create(Int32, 1); - VPValue I1; - VPValue I2; - VPValue M2; + + VPValue *I1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *I2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *M2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); SmallVector Args; - Args.push_back(&I1); - Args.push_back(&I2); - Args.push_back(&M2); + Args.push_back(I1); + Args.push_back(I2); + Args.push_back(M2); VPBlendRecipe Recipe(Phi, Args); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; @@ -988,10 +990,12 @@ TEST_F(VPRecipeTest, CastVPBlendRecipeToVPUser) { } TEST_F(VPRecipeTest, CastVPInterleaveRecipeToVPUser) { - VPValue Addr; - VPValue Mask; + VPlan &Plan = getPlan(); + IntegerType *Int32 = IntegerType::get(C, 32); + VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); InterleaveGroup IG(4, false, Align(4)); - VPInterleaveRecipe Recipe(&IG, &Addr, {}, &Mask, false); + VPInterleaveRecipe Recipe(&IG, Addr, {}, Mask, false); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -999,13 +1003,14 @@ TEST_F(VPRecipeTest, CastVPInterleaveRecipeToVPUser) { } TEST_F(VPRecipeTest, CastVPReplicateRecipeToVPUser) { - VPValue Op1; - VPValue Op2; + VPlan &Plan = getPlan(); + IntegerType *Int32 = IntegerType::get(C, 32); + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op2); + Args.push_back(Op1); + Args.push_back(Op2); - IntegerType *Int32 = IntegerType::get(C, 32); FunctionType *FTy = FunctionType::get(Int32, false); auto *Call = CallInst::Create(FTy, PoisonValue::get(FTy)); VPReplicateRecipe Recipe(Call, make_range(Args.begin(), Args.end()), true); @@ -1016,8 +1021,10 @@ TEST_F(VPRecipeTest, CastVPReplicateRecipeToVPUser) { } TEST_F(VPRecipeTest, CastVPBranchOnMaskRecipeToVPUser) { - VPValue Mask; - VPBranchOnMaskRecipe Recipe(&Mask); + VPlan &Plan = getPlan(); + IntegerType *Int32 = IntegerType::get(C, 32); + VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPBranchOnMaskRecipe Recipe(Mask); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1025,13 +1032,14 @@ TEST_F(VPRecipeTest, CastVPBranchOnMaskRecipeToVPUser) { } TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) { + VPlan &Plan = getPlan(); IntegerType *Int32 = IntegerType::get(C, 32); PointerType *Int32Ptr = PointerType::get(Int32, 0); auto *Load = new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1)); - VPValue Addr; - VPValue Mask; - VPWidenLoadRecipe Recipe(*Load, &Addr, &Mask, true, false, {}); + VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {}); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1048,15 +1056,16 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { IntegerType *Int1 = IntegerType::get(C, 1); IntegerType *Int32 = IntegerType::get(C, 32); PointerType *Int32Ptr = PointerType::get(Int32, 0); + VPlan &Plan = getPlan(); { auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32), PoisonValue::get(Int32)); - VPValue Op1; - VPValue Op2; + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op1); + Args.push_back(Op1); + Args.push_back(Op2); VPWidenRecipe Recipe(*AI, make_range(Args.begin(), Args.end())); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); @@ -1069,13 +1078,13 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { auto *SelectI = SelectInst::Create(PoisonValue::get(Int1), PoisonValue::get(Int32), PoisonValue::get(Int32)); - VPValue Op1; - VPValue Op2; - VPValue Op3; + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *Op3 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op2); - Args.push_back(&Op3); + Args.push_back(Op1); + Args.push_back(Op2); + Args.push_back(Op3); VPWidenSelectRecipe Recipe(*SelectI, make_range(Args.begin(), Args.end())); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); @@ -1087,11 +1096,11 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { { auto *GEP = GetElementPtrInst::Create(Int32, PoisonValue::get(Int32Ptr), PoisonValue::get(Int32)); - VPValue Op1; - VPValue Op2; + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op2); + Args.push_back(Op1); + Args.push_back(Op2); VPWidenGEPRecipe Recipe(GEP, make_range(Args.begin(), Args.end())); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); @@ -1101,8 +1110,9 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { } { - VPValue Mask; - VPBranchOnMaskRecipe Recipe(&Mask); + VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + + VPBranchOnMaskRecipe Recipe(Mask); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1110,11 +1120,11 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { } { - VPValue ChainOp; - VPValue VecOp; - VPValue CondOp; - VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, &ChainOp, &CondOp, - &VecOp, false); + VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); + VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp, + VecOp, false); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1122,13 +1132,13 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { } { - VPValue ChainOp; - VPValue VecOp; - VPValue CondOp; - VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, &ChainOp, &CondOp, - &VecOp, false); - VPValue EVL; - VPReductionEVLRecipe EVLRecipe(Recipe, EVL, &CondOp); + VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); + VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp, + VecOp, false); + VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 4)); + VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp); EXPECT_FALSE(EVLRecipe.mayHaveSideEffects()); EXPECT_FALSE(EVLRecipe.mayReadFromMemory()); EXPECT_FALSE(EVLRecipe.mayWriteToMemory()); @@ -1138,9 +1148,9 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { { auto *Load = new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1)); - VPValue Addr; - VPValue Mask; - VPWidenLoadRecipe Recipe(*Load, &Addr, &Mask, true, false, {}); + VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {}); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1151,10 +1161,10 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { { auto *Store = new StoreInst(PoisonValue::get(Int32), PoisonValue::get(Int32Ptr), false, Align(1)); - VPValue Addr; - VPValue Mask; - VPValue StoredV; - VPWidenStoreRecipe Recipe(*Store, &Addr, &StoredV, &Mask, false, false, {}); + VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *StoredV = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); + VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, {}); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_TRUE(Recipe.mayWriteToMemory()); @@ -1166,13 +1176,13 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { FunctionType *FTy = FunctionType::get(Int32, false); Function *Fn = Function::Create(FTy, GlobalValue::ExternalLinkage, 0); auto *Call = CallInst::Create(FTy, Fn); - VPValue Op1; - VPValue Op2; - VPValue CalledFn(Call->getCalledFunction()); + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *CalledFn = Plan.getOrAddLiveIn(Call->getCalledFunction()); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op2); - Args.push_back(&CalledFn); + Args.push_back(Op1); + Args.push_back(Op2); + Args.push_back(CalledFn); VPWidenCallRecipe Recipe(Call, Fn, Args); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); @@ -1189,13 +1199,13 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { Intrinsic::getOrInsertDeclaration(&M, Intrinsic::thread_pointer); auto *Call = CallInst::Create(TheFn->getFunctionType(), TheFn); - VPValue Op1; - VPValue Op2; - VPValue CalledFn(TheFn); + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *CalledFn = Plan.getOrAddLiveIn(Call->getCalledFunction()); SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op2); - Args.push_back(&CalledFn); + Args.push_back(Op1); + Args.push_back(Op2); + Args.push_back(CalledFn); VPWidenCallRecipe Recipe(Call, TheFn, Args); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); @@ -1205,21 +1215,20 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { } { - VPValue Op1; - VPValue Op2; + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); InductionDescriptor IndDesc; - VPScalarIVStepsRecipe Recipe(IndDesc, &Op1, &Op2); + VPScalarIVStepsRecipe Recipe(IndDesc, Op1, Op2); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); EXPECT_FALSE(Recipe.mayReadOrWriteMemory()); } - // The initial implementation is conservative with respect to VPInstructions. { - VPValue Op1; - VPValue Op2; - VPInstruction VPInst(Instruction::Add, {&Op1, &Op2}); + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *Op2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPInstruction VPInst(Instruction::Add, {Op1, Op2}); VPRecipeBase &Recipe = VPInst; EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); @@ -1227,8 +1236,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { EXPECT_FALSE(Recipe.mayReadOrWriteMemory()); } { - VPValue Op1; - VPPredInstPHIRecipe Recipe(&Op1, {}); + VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPPredInstPHIRecipe Recipe(Op1, {}); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1397,8 +1406,8 @@ TEST_F(VPRecipeTest, dumpRecipeUnnamedVPValuesNotInPlanOrBlock) { auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32), PoisonValue::get(Int32)); AI->setName("a"); - VPValue *ExtVPV1 = new VPValue(ConstantInt::get(Int32, 1)); - VPValue *ExtVPV2 = new VPValue(AI); + VPValue *ExtVPV1 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *ExtVPV2 = getPlan().getOrAddLiveIn(AI); VPInstruction *I1 = new VPInstruction(Instruction::Add, {ExtVPV1, ExtVPV2}); VPInstruction *I2 = new VPInstruction(Instruction::Mul, {I1, I1}); @@ -1468,36 +1477,37 @@ TEST_F(VPRecipeTest, dumpRecipeUnnamedVPValuesNotInPlanOrBlock) { delete I2; delete I1; - delete ExtVPV2; - delete ExtVPV1; delete AI; } #endif TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) { - VPValue ChainOp; - VPValue VecOp; - VPValue CondOp; - VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, &ChainOp, &CondOp, - &VecOp, false); + IntegerType *Int32 = IntegerType::get(C, 32); + VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3)); + VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp, + VecOp, false); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); } TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) { - VPValue ChainOp; - VPValue VecOp; - VPValue CondOp; - VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, &ChainOp, &CondOp, - &VecOp, false); - VPValue EVL; - VPReductionEVLRecipe EVLRecipe(Recipe, EVL, &CondOp); + IntegerType *Int32 = IntegerType::get(C, 32); + VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1)); + VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2)); + VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3)); + VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp, + VecOp, false); + VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0)); + VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp); EXPECT_TRUE(isa(&EVLRecipe)); VPRecipeBase *BaseR = &EVLRecipe; EXPECT_TRUE(isa(BaseR)); } +} // namespace struct VPDoubleValueDef : public VPRecipeBase { VPDoubleValueDef(ArrayRef Operands) : VPRecipeBase(99, Operands) { @@ -1514,6 +1524,8 @@ struct VPDoubleValueDef : public VPRecipeBase { #endif }; +namespace { + TEST(VPDoubleValueDefTest, traverseUseLists) { // Check that the def-use chains of a multi-def can be traversed in both // directions. @@ -1559,8 +1571,9 @@ TEST(VPDoubleValueDefTest, traverseUseLists) { } TEST_F(VPRecipeTest, CastToVPSingleDefRecipe) { - VPValue Start; - VPEVLBasedIVPHIRecipe R(&Start, {}); + IntegerType *Int32 = IntegerType::get(C, 32); + VPValue *Start = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0)); + VPEVLBasedIVPHIRecipe R(Start, {}); VPRecipeBase *B = &R; EXPECT_TRUE(isa(B)); // TODO: check other VPSingleDefRecipes. From 0c6e03eea04b93984d5fe562f64f1ce31f5cca09 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 17 Jan 2025 14:22:25 -0800 Subject: [PATCH 78/88] [RISCV] Fold vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR, -1, MASK) (#123123) Co-authored-by: Brandon Wu --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 79 ++++++++++++++++-- .../RISCV/rvv/vp-combine-store-reverse.ll | 81 +++++++++++++++++++ 2 files changed, 153 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index de100c683a94f..33d67c9c407d8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1524,13 +1524,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); if (Subtarget.hasVInstructions()) - setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, - ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, - ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, - ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, - ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL, - ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, - ISD::INSERT_VECTOR_ELT, ISD::ABS, ISD::CTPOP, + setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, + ISD::MSCATTER, ISD::VP_GATHER, + ISD::VP_SCATTER, ISD::SRA, + ISD::SRL, ISD::SHL, + ISD::STORE, ISD::SPLAT_VECTOR, + ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, + ISD::VP_STORE, ISD::EXPERIMENTAL_VP_REVERSE, + ISD::MUL, ISD::SDIV, + ISD::UDIV, ISD::SREM, + ISD::UREM, ISD::INSERT_VECTOR_ELT, + ISD::ABS, ISD::CTPOP, ISD::VECTOR_SHUFFLE}); if (Subtarget.hasVendorXTHeadMemPair()) setTargetDAGCombine({ISD::LOAD, ISD::STORE}); @@ -16294,6 +16298,65 @@ static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, return Ret; } +static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + // Fold: + // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR, + // -1, MASK) + auto *VPStore = cast(N); + + if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE) + return SDValue(); + + SDValue VPReverse = VPStore->getValue(); + EVT ReverseVT = VPReverse->getValueType(0); + + // We do not have a strided_store version for masks, and the evl of vp.reverse + // and vp.store should always be the same. + if (!ReverseVT.getVectorElementType().isByteSized() || + VPStore->getVectorLength() != VPReverse.getOperand(2) || + !VPReverse.hasOneUse()) + return SDValue(); + + SDValue StoreMask = VPStore->getMask(); + // If Mask is all ones, then load is unmasked and can be reversed. + if (!isOneOrOneSplat(StoreMask)) { + // If the mask is not all ones, we can reverse the store if the mask was + // also reversed by an unmasked vp.reverse with the same EVL. + if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE || + !isOneOrOneSplat(StoreMask.getOperand(1)) || + StoreMask.getOperand(2) != VPStore->getVectorLength()) + return SDValue(); + StoreMask = StoreMask.getOperand(0); + } + + // Base = StoreAddr + (NumElem - 1) * ElemWidthByte + SDLoc DL(N); + MVT XLenVT = Subtarget.getXLenVT(); + SDValue NumElem = VPStore->getVectorLength(); + uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8; + + SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem, + DAG.getConstant(1, DL, XLenVT)); + SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1, + DAG.getConstant(ElemWidthByte, DL, XLenVT)); + SDValue Base = + DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2); + SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT); + + MachineFunction &MF = DAG.getMachineFunction(); + MachinePointerInfo PtrInfo(VPStore->getAddressSpace()); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, VPStore->getMemOperand()->getFlags(), + LocationSize::beforeOrAfterPointer(), VPStore->getAlign()); + + return DAG.getStridedStoreVP( + VPStore->getChain(), DL, VPReverse.getOperand(0), Base, + VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(), + VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(), + VPStore->isTruncatingStore(), VPStore->isCompressingStore()); +} + // Convert from one FMA opcode to another based on whether we are negating the // multiply result and/or the accumulator. // NOTE: Only supports RVV operations with VL. @@ -18474,6 +18537,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::EXPERIMENTAL_VP_REVERSE: return performVP_REVERSECombine(N, DAG, Subtarget); + case ISD::VP_STORE: + return performVP_STORECombine(N, DAG, Subtarget); case ISD::BITCAST: { assert(Subtarget.useRVVForFixedLengthVectors()); SDValue N0 = N->getOperand(0); diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll new file mode 100644 index 0000000000000..4896a1367935a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+f,+v -verify-machineinstrs < %s | FileCheck %s + +define void @test_store_reverse_combiner( %val, * %ptr, i32 zeroext %evl) { +; CHECK-LABEL: test_store_reverse_combiner: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: li a2, -4 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsse32.v v8, (a0), a2 +; CHECK-NEXT: ret + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %val, splat (i1 true), i32 %evl) + call void @llvm.vp.store.nxv2f32.p0nxv2f32( %rev, * %ptr, splat (i1 true), i32 %evl) + ret void +} + +define void @test_store_mask_is_vp_reverse( %val, * %ptr, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_store_mask_is_vp_reverse: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: li a2, -4 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsse32.v v8, (a0), a2, v0.t +; CHECK-NEXT: ret + %storemask = call @llvm.experimental.vp.reverse.nxv2i1( %mask, splat (i1 true), i32 %evl) + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %val, splat (i1 true), i32 %evl) + call void @llvm.vp.store.nxv2f32.p0nxv2f32( %rev, * %ptr, %storemask, i32 %evl) + ret void +} + +define void @test_store_mask_not_all_one( %val, * %ptr, %notallones, i32 zeroext %evl) { +; CHECK-LABEL: test_store_mask_not_all_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vrsub.vx v9, v9, a1, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse32.v v10, (a0), v0.t +; CHECK-NEXT: ret + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %val, %notallones, i32 %evl) + call void @llvm.vp.store.nxv2f32.p0nxv2f32( %rev, * %ptr, %notallones, i32 %evl) + ret void +} + +define void @test_different_evl( %val, * %ptr, %mask, i32 zeroext %evl1, i32 zeroext %evl2) { +; CHECK-LABEL: test_different_evl: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vrsub.vx v9, v9, a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vx v11, v11, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v10, v9 +; CHECK-NEXT: vmsne.vi v0, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vrgather.vv v9, v8, v11 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vse32.v v9, (a0), v0.t +; CHECK-NEXT: ret + %storemask = call @llvm.experimental.vp.reverse.nxv2i1( %mask, splat (i1 true), i32 %evl1) + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %val, splat (i1 true), i32 %evl1) + call void @llvm.vp.store.nxv2f32.p0nxv2f32( %rev, * %ptr, %storemask, i32 %evl2) + ret void +} + +declare @llvm.experimental.vp.reverse.nxv2f32(, , i32) +declare @llvm.experimental.vp.reverse.nxv2i1(, , i32) +declare void @llvm.vp.store.nxv2f32.p0nxv2f32(, * nocapture, , i32) From 2523d3b1024bac7aa2efb6740a136bbf2263994e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 17 Jan 2025 14:34:47 -0800 Subject: [PATCH 79/88] [flang][cuda] Perform scalar assignment of c_devptr inlined (#123407) Because `c_devptr` has a `c_ptr` field, any assignment were done via the Assign runtime function. This leads to stack overflow on the device and taking too much memory. As we know the c_devptr can be directly copied on assignment, make it a special case. --- .../include/flang/Optimizer/Dialect/FIRType.h | 7 +++++ flang/lib/Optimizer/Builder/FIRBuilder.cpp | 4 +++ flang/test/Lower/CUDA/cuda-devptr.cuf | 30 ++++++++++++++++--- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h index 78257ab703086..e19fcde8d0e64 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRType.h +++ b/flang/include/flang/Optimizer/Dialect/FIRType.h @@ -139,6 +139,13 @@ inline bool isa_builtin_cptr_type(mlir::Type t) { return false; } +// Is `t` type(c_devptr)? +inline bool isa_builtin_c_devptr_type(mlir::Type t) { + if (auto recTy = mlir::dyn_cast_or_null(t)) + return recTy.getName().ends_with("T__builtin_c_devptr"); + return false; +} + /// Is `t` type(c_devptr)? inline bool isa_builtin_cdevptr_type(mlir::Type t) { if (auto recTy = mlir::dyn_cast_or_null(t)) diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index ad1244ef99b41..64c540cfb95ae 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -1401,6 +1401,10 @@ static void genComponentByComponentAssignment(fir::FirOpBuilder &builder, /// Can the assignment of this record type be implement with a simple memory /// copy (it requires no deep copy or user defined assignment of components )? static bool recordTypeCanBeMemCopied(fir::RecordType recordType) { + // c_devptr type is a special case. It has a nested c_ptr field but we know it + // can be copied directly. + if (fir::isa_builtin_c_devptr_type(recordType)) + return true; if (fir::hasDynamicSize(recordType)) return false; for (auto [_, fieldType] : recordType.getTypeList()) { diff --git a/flang/test/Lower/CUDA/cuda-devptr.cuf b/flang/test/Lower/CUDA/cuda-devptr.cuf index 561d92ecd3e2e..d61d84d9bc750 100644 --- a/flang/test/Lower/CUDA/cuda-devptr.cuf +++ b/flang/test/Lower/CUDA/cuda-devptr.cuf @@ -1,4 +1,4 @@ -! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s +! RUN: bbc -emit-fir -hlfir -fcuda %s -o - | FileCheck %s ! Test CUDA Fortran specific type @@ -37,12 +37,34 @@ subroutine sub2() end ! CHECK-LABEL: func.func @_QPsub2() -! CHECK: %[[X:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub2Ex"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[X:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub2Ex"} : (!fir.ref>>>) -> !fir.ref>>> ! CHECK: %[[CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{{[<]?}}{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}{{[>]?}}> -! CHECK: %[[CPTR_COORD:.*]] = fir.coordinate_of %{{.*}}#1, %[[CPTR]] : (!fir.ref}{{[>]?}}>>, !fir.field) -> !fir.ref> +! CHECK: %[[CPTR_COORD:.*]] = fir.coordinate_of %{{.*}}, %[[CPTR]] : (!fir.ref}{{[>]?}}>>, !fir.field) -> !fir.ref> ! CHECK: %[[ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[ADDRESS_COORD:.*]] = fir.coordinate_of %[[CPTR_COORD]], %[[ADDRESS]] : (!fir.ref>, !fir.field) -> !fir.ref ! CHECK: %[[ADDRESS_LOADED:.*]] = fir.load %[[ADDRESS_COORD]] : !fir.ref ! CHECK: %[[ADDRESS_IDX:.*]] = fir.convert %[[ADDRESS_LOADED]] : (i64) -> !fir.ptr> ! CHECK: %[[EMBOX:.*]] = fir.embox %[[ADDRESS_IDX]](%{{.*}}) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> -! CHECK: fir.store %[[EMBOX]] to %[[X]]#1 : !fir.ref>>> +! CHECK: fir.store %[[EMBOX]] to %[[X]] : !fir.ref>>> + +attributes(global) subroutine assign_c_devptr(p, a) + use __fortran_builtins, only: c_devloc => __builtin_c_devloc + use __fortran_builtins, only: c_devptr => __builtin_c_devptr + type (c_devptr), device :: p + complex :: a(10) + p = c_devloc(a(1)) +end subroutine + +! CHECK-LABEL: func.func @_QPassign_c_devptr +! CHECK: %[[P:.*]] = fir.declare %arg0 dummy_scope %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFassign_c_devptrEp"} +! CHECK: %[[C_DEVLOC_RES:.*]] = fir.declare %15 {uniq_name = ".tmp.intrinsic_result"} : (!fir.ref}>>) -> !fir.ref}>> +! CHECK: %[[CPTR_FIELD:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> +! CHECK: %[[RES_CPTR_COORD:.*]] = fir.coordinate_of %[[C_DEVLOC_RES]], %[[CPTR_FIELD]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[CPTR_FIELD:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}> +! CHECK: %[[P_CPTR_COORD:.*]] = fir.coordinate_of %[[P]], %[[CPTR_FIELD]] : (!fir.ref}>>, !fir.field) -> !fir.ref> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> +! CHECK: %[[RES_ADDR_COORD:.*]] = fir.coordinate_of %[[RES_CPTR_COORD]], %[[ADDRESS_FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> +! CHECK: %[[P_ADDR_COORD:.*]] = fir.coordinate_of %[[P_CPTR_COORD]], %[[ADDRESS_FIELD]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[ADDR:.*]] = fir.load %[[RES_ADDR_COORD]] : !fir.ref +! CHECK: fir.store %[[ADDR]] to %[[P_ADDR_COORD]] : !fir.ref From 22d4ff155aadf0f098dd5dc48d9038da15108937 Mon Sep 17 00:00:00 2001 From: vporpo Date: Fri, 17 Jan 2025 14:37:29 -0800 Subject: [PATCH 80/88] [SandboxIR] Fix CmpInst::create() when it gets folded (#123408) If the operands of a CmpInst are constants then it gets folded into a constant. Therefore CmpInst::create() should return a Value*, not a Constant* and should handle the creation of the constant correctly. --- llvm/include/llvm/SandboxIR/Instruction.h | 13 ++++---- llvm/lib/SandboxIR/Instruction.cpp | 33 +++++++++++--------- llvm/unittests/SandboxIR/SandboxIRTest.cpp | 35 +++++++++++++++++----- 3 files changed, 53 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/SandboxIR/Instruction.h b/llvm/include/llvm/SandboxIR/Instruction.h index 34a7feb63bec4..49ea6707ecd82 100644 --- a/llvm/include/llvm/SandboxIR/Instruction.h +++ b/llvm/include/llvm/SandboxIR/Instruction.h @@ -2478,13 +2478,12 @@ class CmpInst : public SingleLLVMInstructionImpl { public: using Predicate = llvm::CmpInst::Predicate; - static CmpInst *create(Predicate Pred, Value *S1, Value *S2, - InsertPosition Pos, Context &Ctx, - const Twine &Name = ""); - static CmpInst *createWithCopiedFlags(Predicate Pred, Value *S1, Value *S2, - const Instruction *FlagsSource, - InsertPosition Pos, Context &Ctx, - const Twine &Name = ""); + static Value *create(Predicate Pred, Value *S1, Value *S2, InsertPosition Pos, + Context &Ctx, const Twine &Name = ""); + static Value *createWithCopiedFlags(Predicate Pred, Value *S1, Value *S2, + const Instruction *FlagsSource, + InsertPosition Pos, Context &Ctx, + const Twine &Name = ""); void setPredicate(Predicate P); void swapOperands(); diff --git a/llvm/lib/SandboxIR/Instruction.cpp b/llvm/lib/SandboxIR/Instruction.cpp index 0a7cd95124bb5..cc961418600e3 100644 --- a/llvm/lib/SandboxIR/Instruction.cpp +++ b/llvm/lib/SandboxIR/Instruction.cpp @@ -926,21 +926,26 @@ void PHINode::removeIncomingValueIf(function_ref Predicate) { } } -CmpInst *CmpInst::create(Predicate P, Value *S1, Value *S2, InsertPosition Pos, - Context &Ctx, const Twine &Name) { +Value *CmpInst::create(Predicate P, Value *S1, Value *S2, InsertPosition Pos, + Context &Ctx, const Twine &Name) { auto &Builder = setInsertPos(Pos); - auto *LLVMI = Builder.CreateCmp(P, S1->Val, S2->Val, Name); - if (dyn_cast(LLVMI)) - return Ctx.createICmpInst(cast(LLVMI)); - return Ctx.createFCmpInst(cast(LLVMI)); -} -CmpInst *CmpInst::createWithCopiedFlags(Predicate P, Value *S1, Value *S2, - const Instruction *F, - InsertPosition Pos, Context &Ctx, - const Twine &Name) { - CmpInst *Inst = create(P, S1, S2, Pos, Ctx, Name); - cast(Inst->Val)->copyIRFlags(F->Val); - return Inst; + auto *LLVMV = Builder.CreateCmp(P, S1->Val, S2->Val, Name); + // It may have been folded into a constant. + if (auto *LLVMC = dyn_cast(LLVMV)) + return Ctx.getOrCreateConstant(LLVMC); + if (isa(LLVMV)) + return Ctx.createICmpInst(cast(LLVMV)); + return Ctx.createFCmpInst(cast(LLVMV)); +} + +Value *CmpInst::createWithCopiedFlags(Predicate P, Value *S1, Value *S2, + const Instruction *F, InsertPosition Pos, + Context &Ctx, const Twine &Name) { + Value *V = create(P, S1, S2, Pos, Ctx, Name); + if (auto *C = dyn_cast(V)) + return C; + cast(V->Val)->copyIRFlags(F->Val); + return V; } Type *CmpInst::makeCmpResultType(Type *OpndType) { diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 874c32c2d4398..73e8ef283fc2a 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -5841,9 +5841,9 @@ define void @foo(i32 %i0, i32 %i1) { EXPECT_EQ(ICmp->getSignedPredicate(), LLVMICmp->getSignedPredicate()); EXPECT_EQ(ICmp->getUnsignedPredicate(), LLVMICmp->getUnsignedPredicate()); } - auto *NewCmp = + auto *NewCmp = cast( sandboxir::CmpInst::create(llvm::CmpInst::ICMP_ULE, F.getArg(0), - F.getArg(1), BB->begin(), Ctx, "NewCmp"); + F.getArg(1), BB->begin(), Ctx, "NewCmp")); EXPECT_EQ(NewCmp, &*BB->begin()); EXPECT_EQ(NewCmp->getPredicate(), llvm::CmpInst::ICMP_ULE); EXPECT_EQ(NewCmp->getOperand(0), F.getArg(0)); @@ -5856,6 +5856,16 @@ define void @foo(i32 %i0, i32 %i1) { sandboxir::Type *RT = sandboxir::CmpInst::makeCmpResultType(F.getArg(0)->getType()); EXPECT_TRUE(RT->isIntegerTy(1)); // Only one bit in a single comparison + + { + // Check create() when operands are constant. + auto *Const42 = + sandboxir::ConstantInt::get(sandboxir::Type::getInt32Ty(Ctx), 42); + auto *NewConstCmp = + sandboxir::CmpInst::create(llvm::CmpInst::ICMP_ULE, Const42, Const42, + BB->begin(), Ctx, "NewConstCmp"); + EXPECT_TRUE(isa(NewConstCmp)); + } } TEST_F(SandboxIRTest, FCmpInst) { @@ -5906,8 +5916,8 @@ define void @foo(float %f0, float %f1) { CopyFrom->setFastMathFlags(FastMathFlags::getFast()); // create with default flags - auto *NewFCmp = sandboxir::CmpInst::create( - llvm::CmpInst::FCMP_ONE, F.getArg(0), F.getArg(1), It1, Ctx, "NewFCmp"); + auto *NewFCmp = cast(sandboxir::CmpInst::create( + llvm::CmpInst::FCMP_ONE, F.getArg(0), F.getArg(1), It1, Ctx, "NewFCmp")); EXPECT_EQ(NewFCmp->getPredicate(), llvm::CmpInst::FCMP_ONE); EXPECT_EQ(NewFCmp->getOperand(0), F.getArg(0)); EXPECT_EQ(NewFCmp->getOperand(1), F.getArg(1)); @@ -5917,9 +5927,10 @@ define void @foo(float %f0, float %f1) { FastMathFlags DefaultFMF = NewFCmp->getFastMathFlags(); EXPECT_TRUE(CopyFrom->getFastMathFlags() != DefaultFMF); // create with copied flags - auto *NewFCmpFlags = sandboxir::CmpInst::createWithCopiedFlags( - llvm::CmpInst::FCMP_ONE, F.getArg(0), F.getArg(1), CopyFrom, It1, Ctx, - "NewFCmpFlags"); + auto *NewFCmpFlags = + cast(sandboxir::CmpInst::createWithCopiedFlags( + llvm::CmpInst::FCMP_ONE, F.getArg(0), F.getArg(1), CopyFrom, It1, Ctx, + "NewFCmpFlags")); EXPECT_FALSE(NewFCmpFlags->getFastMathFlags() != CopyFrom->getFastMathFlags()); EXPECT_EQ(NewFCmpFlags->getPredicate(), llvm::CmpInst::FCMP_ONE); @@ -5928,6 +5939,16 @@ define void @foo(float %f0, float %f1) { #ifndef NDEBUG EXPECT_EQ(NewFCmpFlags->getName(), "NewFCmpFlags"); #endif // NDEBUG + + { + // Check create() when operands are constant. + auto *Const42 = + sandboxir::ConstantFP::get(sandboxir::Type::getFloatTy(Ctx), 42.0); + auto *NewConstCmp = + sandboxir::CmpInst::create(llvm::CmpInst::FCMP_ULE, Const42, Const42, + BB->begin(), Ctx, "NewConstCmp"); + EXPECT_TRUE(isa(NewConstCmp)); + } } TEST_F(SandboxIRTest, UnreachableInst) { From 87e4b68195adc81fae40a4fa27e33458a9586fe5 Mon Sep 17 00:00:00 2001 From: vporpo Date: Fri, 17 Jan 2025 15:48:24 -0800 Subject: [PATCH 81/88] [SandboxVec][Legality] Implement ShuffleMask (#123404) This patch implements a helper ShuffleMask data structure that helps describe shuffles of elements across lanes. --- .../Vectorize/SandboxVectorizer/Legality.h | 89 +++++++++++++++-- .../SandboxVectorizer/Passes/BottomUpVec.h | 2 + .../Vectorize/SandboxVectorizer/Legality.cpp | 14 ++- .../SandboxVectorizer/Passes/BottomUpVec.cpp | 13 +++ .../SandboxVectorizer/bottomup_basic.ll | 21 ++++ .../SandboxVectorizer/LegalityTest.cpp | 97 ++++++++++++++++++- 6 files changed, 219 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index c03e7a10397ad..4858ebaf0770a 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -25,10 +25,62 @@ class LegalityAnalysis; class Value; class InstrMaps; +class ShuffleMask { +public: + using IndicesVecT = SmallVector; + +private: + IndicesVecT Indices; + +public: + ShuffleMask(SmallVectorImpl &&Indices) : Indices(std::move(Indices)) {} + ShuffleMask(std::initializer_list Indices) : Indices(Indices) {} + explicit ShuffleMask(ArrayRef Indices) : Indices(Indices) {} + operator ArrayRef() const { return Indices; } + /// Creates and returns an identity shuffle mask of size \p Sz. + /// For example if Sz == 4 the returned mask is {0, 1, 2, 3}. + static ShuffleMask getIdentity(unsigned Sz) { + IndicesVecT Indices; + Indices.reserve(Sz); + for (auto Idx : seq(0, (int)Sz)) + Indices.push_back(Idx); + return ShuffleMask(std::move(Indices)); + } + /// \Returns true if the mask is a perfect identity mask with consecutive + /// indices, i.e., performs no lane shuffling, like 0,1,2,3... + bool isIdentity() const { + for (auto [Idx, Elm] : enumerate(Indices)) { + if ((int)Idx != Elm) + return false; + } + return true; + } + bool operator==(const ShuffleMask &Other) const { + return Indices == Other.Indices; + } + bool operator!=(const ShuffleMask &Other) const { return !(*this == Other); } + size_t size() const { return Indices.size(); } + int operator[](int Idx) const { return Indices[Idx]; } + using const_iterator = IndicesVecT::const_iterator; + const_iterator begin() const { return Indices.begin(); } + const_iterator end() const { return Indices.end(); } +#ifndef NDEBUG + friend raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &Mask) { + Mask.print(OS); + return OS; + } + void print(raw_ostream &OS) const { + interleave(Indices, OS, [&OS](auto Elm) { OS << Elm; }, ","); + } + LLVM_DUMP_METHOD void dump() const; +#endif +}; + enum class LegalityResultID { - Pack, ///> Collect scalar values. - Widen, ///> Vectorize by combining scalars to a vector. - DiamondReuse, ///> Don't generate new code, reuse existing vector. + Pack, ///> Collect scalar values. + Widen, ///> Vectorize by combining scalars to a vector. + DiamondReuse, ///> Don't generate new code, reuse existing vector. + DiamondReuseWithShuffle, ///> Reuse the existing vector but add a shuffle. }; /// The reason for vectorizing or not vectorizing. @@ -54,6 +106,8 @@ struct ToStr { return "Widen"; case LegalityResultID::DiamondReuse: return "DiamondReuse"; + case LegalityResultID::DiamondReuseWithShuffle: + return "DiamondReuseWithShuffle"; } llvm_unreachable("Unknown LegalityResultID enum"); } @@ -154,6 +208,22 @@ class DiamondReuse final : public LegalityResult { Value *getVector() const { return Vec; } }; +class DiamondReuseWithShuffle final : public LegalityResult { + friend class LegalityAnalysis; + Value *Vec; + ShuffleMask Mask; + DiamondReuseWithShuffle(Value *Vec, const ShuffleMask &Mask) + : LegalityResult(LegalityResultID::DiamondReuseWithShuffle), Vec(Vec), + Mask(Mask) {} + +public: + static bool classof(const LegalityResult *From) { + return From->getSubclassID() == LegalityResultID::DiamondReuseWithShuffle; + } + Value *getVector() const { return Vec; } + const ShuffleMask &getMask() const { return Mask; } +}; + class Pack final : public LegalityResultWithReason { Pack(ResultReason Reason) : LegalityResultWithReason(LegalityResultID::Pack, Reason) {} @@ -192,23 +262,22 @@ class CollectDescr { CollectDescr(SmallVectorImpl &&Descrs) : Descrs(std::move(Descrs)) {} /// If all elements come from a single vector input, then return that vector - /// and whether we need a shuffle to get them in order. - std::optional> getSingleInput() const { + /// and also the shuffle mask required to get them in order. + std::optional> getSingleInput() const { const auto &Descr0 = *Descrs.begin(); Value *V0 = Descr0.getValue(); if (!Descr0.needsExtract()) return std::nullopt; - bool NeedsShuffle = Descr0.getExtractIdx() != 0; - int Lane = 1; + ShuffleMask::IndicesVecT MaskIndices; + MaskIndices.push_back(Descr0.getExtractIdx()); for (const auto &Descr : drop_begin(Descrs)) { if (!Descr.needsExtract()) return std::nullopt; if (Descr.getValue() != V0) return std::nullopt; - if (Descr.getExtractIdx() != Lane++) - NeedsShuffle = true; + MaskIndices.push_back(Descr.getExtractIdx()); } - return std::make_pair(V0, NeedsShuffle); + return std::make_pair(V0, ShuffleMask(std::move(MaskIndices))); } bool hasVectorInputs() const { return any_of(Descrs, [](const auto &D) { return D.needsExtract(); }); diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h index dd3012f7c9b55..ac051c3b6570f 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h @@ -36,6 +36,8 @@ class BottomUpVec final : public FunctionPass { /// Erases all dead instructions from the dead instruction candidates /// collected during vectorization. void tryEraseDeadInstrs(); + /// Creates a shuffle instruction that shuffles \p VecOp according to \p Mask. + Value *createShuffle(Value *VecOp, const ShuffleMask &Mask); /// Packs all elements of \p ToPack into a vector and returns that vector. Value *createPack(ArrayRef ToPack); void collectPotentiallyDeadInstrs(ArrayRef Bndl); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index f8149c5bc6636..ad3e38e2f1d92 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -20,6 +20,11 @@ namespace llvm::sandboxir { #define DEBUG_TYPE "SBVec:Legality" #ifndef NDEBUG +void ShuffleMask::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + void LegalityResult::dump() const { print(dbgs()); dbgs() << "\n"; @@ -213,13 +218,12 @@ const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl, auto CollectDescrs = getHowToCollectValues(Bndl); if (CollectDescrs.hasVectorInputs()) { if (auto ValueShuffleOpt = CollectDescrs.getSingleInput()) { - auto [Vec, NeedsShuffle] = *ValueShuffleOpt; - if (!NeedsShuffle) + auto [Vec, Mask] = *ValueShuffleOpt; + if (Mask.isIdentity()) return createLegalityResult(Vec); - llvm_unreachable("TODO: Unimplemented"); - } else { - llvm_unreachable("TODO: Unimplemented"); + return createLegalityResult(Vec, Mask); } + llvm_unreachable("TODO: Unimplemented"); } if (auto ReasonOpt = notVectorizableBasedOnOpcodesAndTypes(Bndl)) diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index b8e2697839a3c..d62023ea01884 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -179,6 +179,12 @@ void BottomUpVec::tryEraseDeadInstrs() { DeadInstrCandidates.clear(); } +Value *BottomUpVec::createShuffle(Value *VecOp, const ShuffleMask &Mask) { + BasicBlock::iterator WhereIt = getInsertPointAfterInstrs({VecOp}); + return ShuffleVectorInst::create(VecOp, VecOp, Mask, WhereIt, + VecOp->getContext(), "VShuf"); +} + Value *BottomUpVec::createPack(ArrayRef ToPack) { BasicBlock::iterator WhereIt = getInsertPointAfterInstrs(ToPack); @@ -295,6 +301,13 @@ Value *BottomUpVec::vectorizeRec(ArrayRef Bndl, unsigned Depth) { NewVec = cast(LegalityRes).getVector(); break; } + case LegalityResultID::DiamondReuseWithShuffle: { + auto *VecOp = cast(LegalityRes).getVector(); + const ShuffleMask &Mask = + cast(LegalityRes).getMask(); + NewVec = createShuffle(VecOp, Mask); + break; + } case LegalityResultID::Pack: { // If we can't vectorize the seeds then just return. if (Depth == 0) diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll index 7bc6e5ac3d760..a3798af839908 100644 --- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll @@ -221,3 +221,24 @@ define void @diamond(ptr %ptr) { store float %sub1, ptr %ptr1 ret void } + +define void @diamondWithShuffle(ptr %ptr) { +; CHECK-LABEL: define void @diamondWithShuffle( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 +; CHECK-NEXT: [[VSHUF:%.*]] = shufflevector <2 x float> [[VECL]], <2 x float> [[VECL]], <2 x i32> +; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VSHUF]] +; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4 +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr float, ptr %ptr, i32 0 + %ptr1 = getelementptr float, ptr %ptr, i32 1 + %ld0 = load float, ptr %ptr0 + %ld1 = load float, ptr %ptr1 + %sub0 = fsub float %ld0, %ld1 + %sub1 = fsub float %ld1, %ld0 + store float %sub0, ptr %ptr0 + store float %sub1, ptr %ptr1 + ret void +} diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 069bfdba0a7cd..b421d08bc6b02 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -19,6 +19,7 @@ #include "llvm/SandboxIR/Instruction.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" using namespace llvm; @@ -321,7 +322,7 @@ define void @foo(ptr %ptr) { sandboxir::CollectDescr CD(std::move(Descrs)); EXPECT_TRUE(CD.getSingleInput()); EXPECT_EQ(CD.getSingleInput()->first, VLd); - EXPECT_EQ(CD.getSingleInput()->second, false); + EXPECT_THAT(CD.getSingleInput()->second, testing::ElementsAre(0, 1)); EXPECT_TRUE(CD.hasVectorInputs()); } { @@ -331,7 +332,7 @@ define void @foo(ptr %ptr) { sandboxir::CollectDescr CD(std::move(Descrs)); EXPECT_TRUE(CD.getSingleInput()); EXPECT_EQ(CD.getSingleInput()->first, VLd); - EXPECT_EQ(CD.getSingleInput()->second, true); + EXPECT_THAT(CD.getSingleInput()->second, testing::ElementsAre(1, 0)); EXPECT_TRUE(CD.hasVectorInputs()); } { @@ -352,3 +353,95 @@ define void @foo(ptr %ptr) { EXPECT_FALSE(CD.hasVectorInputs()); } } + +TEST_F(LegalityTest, ShuffleMask) { + { + // Check SmallVector constructor. + SmallVector Indices({0, 1, 2, 3}); + sandboxir::ShuffleMask Mask(std::move(Indices)); + EXPECT_THAT(Mask, testing::ElementsAre(0, 1, 2, 3)); + } + { + // Check initializer_list constructor. + sandboxir::ShuffleMask Mask({0, 1, 2, 3}); + EXPECT_THAT(Mask, testing::ElementsAre(0, 1, 2, 3)); + } + { + // Check ArrayRef constructor. + sandboxir::ShuffleMask Mask(ArrayRef({0, 1, 2, 3})); + EXPECT_THAT(Mask, testing::ElementsAre(0, 1, 2, 3)); + } + { + // Check operator ArrayRef(). + sandboxir::ShuffleMask Mask({0, 1, 2, 3}); + ArrayRef Array = Mask; + EXPECT_THAT(Array, testing::ElementsAre(0, 1, 2, 3)); + } + { + // Check getIdentity(). + auto IdentityMask = sandboxir::ShuffleMask::getIdentity(4); + EXPECT_THAT(IdentityMask, testing::ElementsAre(0, 1, 2, 3)); + EXPECT_TRUE(IdentityMask.isIdentity()); + } + { + // Check isIdentity(). + sandboxir::ShuffleMask Mask1({0, 1, 2, 3}); + EXPECT_TRUE(Mask1.isIdentity()); + sandboxir::ShuffleMask Mask2({1, 2, 3, 4}); + EXPECT_FALSE(Mask2.isIdentity()); + } + { + // Check operator==(). + sandboxir::ShuffleMask Mask1({0, 1, 2, 3}); + sandboxir::ShuffleMask Mask2({0, 1, 2, 3}); + EXPECT_TRUE(Mask1 == Mask2); + EXPECT_FALSE(Mask1 != Mask2); + } + { + // Check operator!=(). + sandboxir::ShuffleMask Mask1({0, 1, 2, 3}); + sandboxir::ShuffleMask Mask2({0, 1, 2, 4}); + EXPECT_TRUE(Mask1 != Mask2); + EXPECT_FALSE(Mask1 == Mask2); + } + { + // Check size(). + sandboxir::ShuffleMask Mask({0, 1, 2, 3}); + EXPECT_EQ(Mask.size(), 4u); + } + { + // Check operator[]. + sandboxir::ShuffleMask Mask({0, 1, 2, 3}); + for (auto [Idx, Elm] : enumerate(Mask)) { + EXPECT_EQ(Elm, Mask[Idx]); + } + } + { + // Check begin(), end(). + sandboxir::ShuffleMask Mask({0, 1, 2, 3}); + sandboxir::ShuffleMask::const_iterator Begin = Mask.begin(); + sandboxir::ShuffleMask::const_iterator End = Mask.begin(); + int Idx = 0; + for (auto It = Begin; It != End; ++It) { + EXPECT_EQ(*It, Mask[Idx++]); + } + } +#ifndef NDEBUG + { + // Check print(OS). + sandboxir::ShuffleMask Mask({0, 1, 2, 3}); + std::string Str; + raw_string_ostream OS(Str); + Mask.print(OS); + EXPECT_EQ(Str, "0,1,2,3"); + } + { + // Check operator<<(). + sandboxir::ShuffleMask Mask({0, 1, 2, 3}); + std::string Str; + raw_string_ostream OS(Str); + OS << Mask; + EXPECT_EQ(Str, "0,1,2,3"); + } +#endif // NDEBUG +} From 9fdc38c81c7d1b61cb0750e5f5b273d6d1877513 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Fri, 17 Jan 2025 17:26:44 -0800 Subject: [PATCH 82/88] [WebAssembly][Object] Support more elem segment flags (#123427) Some tools (e.g. Rust tooling) produce element segment descriptors with neither elemkind or element type descriptors, but with init exprs instead of func indices (this is with the flags value of 4 in https://webassembly.github.io/spec/core/binary/modules.html#element-section). LLVM doesn't fully model reference types or the various ways to initialize element segments, but we do want to correctly parse and skip over all type sections, so this change updates the object parser to handle that case, and refactors for more clarity. The test file is updated to include one additional elem segment with a flags value of 4, an initializer value of (32.const 0) and an empty vector. Also support parsing files that export imported (undefined) functions. --- lld/wasm/SyntheticSections.cpp | 2 +- llvm/include/llvm/BinaryFormat/Wasm.h | 6 ++- llvm/lib/MC/WasmObjectWriter.cpp | 2 +- llvm/lib/Object/WasmObjectFile.cpp | 41 ++++++++++++------ llvm/lib/ObjectYAML/WasmEmitter.cpp | 2 +- llvm/lib/ObjectYAML/WasmYAML.cpp | 2 +- llvm/test/Object/Inputs/WASM/multi-table.wasm | Bin 185 -> 190 bytes 7 files changed, 36 insertions(+), 19 deletions(-) diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 715fba1ee6da5..7fb44b9f0c009 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -594,7 +594,7 @@ void ElemSection::writeBody() { } writeInitExpr(os, initExpr); - if (flags & WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) { + if (flags & WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) { // We only write active function table initializers, for which the elem kind // is specified to be written as 0x00 and interpreted to mean "funcref". const uint8_t elemKind = 0; diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 759e432125091..ede2d692a5949 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -170,7 +170,7 @@ enum : unsigned { WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0 WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04, }; -const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND = 0x3; +const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC = 0x3; // Feature policy prefixes used in the custom "target_features" section enum : uint8_t { @@ -415,6 +415,10 @@ struct WasmDataSegment { uint32_t Comdat; // from the "comdat info" section }; +// 3 different element segment modes are encodable. This class is currently +// only used during decoding (see WasmElemSegment below). +enum class ElemSegmentMode { Active, Passive, Declarative }; + // Represents a Wasm element segment, with some limitations compared the spec: // 1) Does not model passive or declarative segments (Segment will end up with // an Offset field of i32.const 0) diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 29a8c53d350a4..8ddbe929e68b9 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1019,7 +1019,7 @@ void WasmObjectWriter::writeElemSection( encodeSLEB128(InitialTableOffset, W->OS); W->OS << char(wasm::WASM_OPCODE_END); - if (Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) { + if (Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) { // We only write active function table initializers, for which the elem kind // is specified to be written as 0x00 and interpreted to mean "funcref". const uint8_t ElemKind = 0; diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index 2c9b878a4cde9..0f6fd5612f9d8 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -1440,15 +1440,20 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) { Info.Flags = 0; switch (Ex.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: { - if (!isDefinedFunctionIndex(Ex.Index)) + if (!isValidFunctionIndex(Ex.Index)) return make_error("invalid function export", object_error::parse_failed); - getDefinedFunction(Ex.Index).ExportName = Ex.Name; Info.Kind = wasm::WASM_SYMBOL_TYPE_FUNCTION; Info.ElementIndex = Ex.Index; - unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions; - wasm::WasmFunction &Function = Functions[FuncIndex]; - Signature = &Signatures[Function.SigIndex]; + if (isDefinedFunctionIndex(Ex.Index)) { + getDefinedFunction(Ex.Index).ExportName = Ex.Name; + unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions; + wasm::WasmFunction &Function = Functions[FuncIndex]; + Signature = &Signatures[Function.SigIndex]; + } + // Else the function is imported. LLVM object files don't use this + // pattern and we still treat this as an undefined symbol, but we want to + // parse it without crashing. break; } case wasm::WASM_EXTERNAL_GLOBAL: { @@ -1645,17 +1650,25 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { return make_error( "Unsupported flags for element segment", object_error::parse_failed); - bool IsPassive = (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) != 0; - bool IsDeclarative = - IsPassive && (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_DECLARATIVE); + wasm::ElemSegmentMode Mode; + if ((Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) == 0) { + Mode = wasm::ElemSegmentMode::Active; + } else if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_DECLARATIVE) { + Mode = wasm::ElemSegmentMode::Declarative; + } else { + Mode = wasm::ElemSegmentMode::Passive; + } bool HasTableNumber = - !IsPassive && + Mode == wasm::ElemSegmentMode::Active && (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER); + bool HasElemKind = + (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) && + !(Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS); + bool HasElemType = + (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) && + (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS); bool HasInitExprs = (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS); - bool HasElemKind = - (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) && - !HasInitExprs; if (HasTableNumber) Segment.TableNumber = readVaruint32(Ctx); @@ -1666,7 +1679,7 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { return make_error("invalid TableNumber", object_error::parse_failed); - if (IsPassive || IsDeclarative) { + if (Mode != wasm::ElemSegmentMode::Active) { Segment.Offset.Extended = false; Segment.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST; Segment.Offset.Inst.Value.Int32 = 0; @@ -1692,7 +1705,7 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { object_error::parse_failed); Segment.ElemKind = wasm::ValType::FUNCREF; } - } else if (HasInitExprs) { + } else if (HasElemType) { auto ElemType = parseValType(Ctx, readVaruint32(Ctx)); Segment.ElemKind = ElemType; } else { diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp index 817d364694b43..bd016764f5862 100644 --- a/llvm/lib/ObjectYAML/WasmEmitter.cpp +++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp @@ -497,7 +497,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS, writeInitExpr(OS, Segment.Offset); - if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) { + if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) { // We only support active function table initializers, for which the elem // kind is specified to be written as 0x00 and interpreted to mean // "funcref". diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp index 0636e19e05353..6af66ba62be18 100644 --- a/llvm/lib/ObjectYAML/WasmYAML.cpp +++ b/llvm/lib/ObjectYAML/WasmYAML.cpp @@ -381,7 +381,7 @@ void MappingTraits::mapping( Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER) IO.mapOptional("TableNumber", Segment.TableNumber); if (!IO.outputting() || - Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) + Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) IO.mapOptional("ElemKind", Segment.ElemKind); // TODO: Omit "offset" for passive segments? It's neither meaningful nor // encoded. diff --git a/llvm/test/Object/Inputs/WASM/multi-table.wasm b/llvm/test/Object/Inputs/WASM/multi-table.wasm index 47f5d8311cb74f76485577df85578b62f896361d..81e52a2d3e28658cfdb2a488bcd1bce7fea16575 100644 GIT binary patch delta 35 ncmdnVxQ}sy6H^xVM3)pX7Dond1}=VPR)$MVQryf8j9?4^javnp delta 30 icmdnTxRY^$6H_YJM3)pnE`DZKhD%IR+{_G&U Date: Fri, 17 Jan 2025 17:55:54 -0800 Subject: [PATCH 83/88] =?UTF-8?q?Add=20option=20to=20print=20entire=20func?= =?UTF-8?q?tion=20instead=20of=20just=20the=20loops=20for=20loo=E2=80=A6?= =?UTF-8?q?=20(#123229)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit print-after-all is useful for diffing IR between two passes. When one of the two is a function pass, and the other is a loop pass, the diff becomes useless. Add an option which prints the entire function for loop passes. --- llvm/include/llvm/IR/PrintPasses.h | 3 + llvm/lib/Analysis/LoopInfo.cpp | 12 ++++ llvm/lib/IR/PrintPasses.cpp | 8 +++ llvm/test/Other/print-loop-func-scope.ll | 75 ++++++++++++++++++++++++ 4 files changed, 98 insertions(+) create mode 100644 llvm/test/Other/print-loop-func-scope.ll diff --git a/llvm/include/llvm/IR/PrintPasses.h b/llvm/include/llvm/IR/PrintPasses.h index 95b97e76c867c..0aa1b379c35cf 100644 --- a/llvm/include/llvm/IR/PrintPasses.h +++ b/llvm/include/llvm/IR/PrintPasses.h @@ -51,6 +51,9 @@ std::vector printAfterPasses(); // Returns true if we should always print the entire module. bool forcePrintModuleIR(); +// Returns true if we should print the entire function for loop passes. +bool forcePrintFuncIR(); + // Return true if -filter-passes is empty or contains the pass name. bool isPassInPrintList(StringRef PassName); bool isFilterPassesEmpty(); diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 6bb5f001e9bd1..7bd5e1e0cfac8 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -999,6 +999,18 @@ void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) { return; } + if (forcePrintFuncIR()) { + // handling -print-loop-func-scope. + // -print-module-scope overrides this. + OS << Banner << " (loop: "; + L.getHeader()->printAsOperand(OS, false); + OS << ")\n"; + + // printing whole function. + OS << *L.getHeader()->getParent(); + return; + } + OS << Banner; auto *PreHeader = L.getLoopPreheader(); diff --git a/llvm/lib/IR/PrintPasses.cpp b/llvm/lib/IR/PrintPasses.cpp index e2ef20bb81ba7..610411a3cf978 100644 --- a/llvm/lib/IR/PrintPasses.cpp +++ b/llvm/lib/IR/PrintPasses.cpp @@ -88,6 +88,12 @@ static cl::opt "always print a module IR"), cl::init(false), cl::Hidden); +static cl::opt LoopPrintFuncScope( + "print-loop-func-scope", + cl::desc("When printing IR for print-[before|after]{-all} " + "for a loop pass, always print function IR"), + cl::init(false), cl::Hidden); + // See the description for -print-changed for an explanation of the use // of this option. static cl::list FilterPasses( @@ -141,6 +147,8 @@ std::vector llvm::printAfterPasses() { bool llvm::forcePrintModuleIR() { return PrintModuleScope; } +bool llvm::forcePrintFuncIR() { return LoopPrintFuncScope; } + bool llvm::isPassInPrintList(StringRef PassName) { static std::unordered_set Set(FilterPasses.begin(), FilterPasses.end()); diff --git a/llvm/test/Other/print-loop-func-scope.ll b/llvm/test/Other/print-loop-func-scope.ll new file mode 100644 index 0000000000000..507ff70a5fd96 --- /dev/null +++ b/llvm/test/Other/print-loop-func-scope.ll @@ -0,0 +1,75 @@ +; This test documents how the IR dumped for loop passes differs with -print-loop-func-scope +; and -print-module-scope +; - Without -print-loop-func-scope, dumps only the loop, with 3 sections- preheader, +; loop, and exit blocks +; - With -print-loop-func-scope, dumps only the function which contains the loop +; - With -print-module-scope, dumps the entire module containing the loop, and disregards +; the -print-loop-func-scope flag. + +; RUN: opt < %s 2>&1 -disable-output \ +; RUN: -passes=licm -print-after=licm \ +; RUN: | FileCheck %s -check-prefix=VANILLA +; RUN: opt < %s 2>&1 -disable-output \ +; RUN: -passes=licm -print-after=licm -print-loop-func-scope \ +; RUN: | FileCheck %s -check-prefix=LOOPFUNC +; RUN: opt < %s 2>&1 -disable-output \ +; RUN: -passes=licm -print-after=licm -print-module-scope \ +; RUN: | FileCheck %s -check-prefix=MODULE +; RUN: opt < %s 2>&1 -disable-output \ +; RUN: -passes=licm -print-after=licm -print-module-scope -print-loop-func-scope\ +; RUN: | FileCheck %s -check-prefix=MODULEWITHLOOP + +; VANILLA: IR Dump After LICMPass +; VANILLA-NOT: define void @foo +; VANILLA: Preheader: +; VANILLA: Loop: +; VANILLA: Exit blocks + +; LOOPFUNC: IR Dump After LICMPass +; LOOPFUNC: (loop: +; LOOPFUNC: define void @foo +; LOOPFUNC-NOT: Preheader: +; LOOPFUNC-NOT: Loop: +; LOOPFUNC-NOT: Exit blocks + +; MODULE: IR Dump After LICMPass +; MODULE: ModuleID = +; MODULE: define void @foo +; MODULE-NOT: Preheader: +; MODULE-NOT: Loop: +; MODULE-NOT: Exit blocks +; MODULE: define void @bar +; MODULE: declare void @baz(i32) + +; MODULEWITHLOOP: IR Dump After LICMPass +; MODULEWITHLOOP: ModuleID = +; MODULEWITHLOOP: define void @foo +; MODULEWITHLOOP-NOT: Preheader: +; MODULEWITHLOOP-NOT: Loop: +; MODULEWITHLOOP-NOT: Exit blocks +; MODULEWITHLOOP: define void @bar +; MODULEWITHLOOP: declare void @baz(i32) + +define void @foo(i32 %n) { +entry: + br label %loop_cond + +loop_cond: + %i = phi i32 [ 0, %entry ], [ %i_next, %loop_body ] + %cmp = icmp slt i32 %i, %n + br i1 %cmp, label %loop_body, label %loop_end + +loop_body: + call void @baz(i32 %i) + %i_next = add i32 %i, 1 + br label %loop_cond + +loop_end: + ret void +} + +define void @bar() { + ret void +} + +declare void @baz(i32) From 07d496538f5543a8eed5e207148e28e358b7cca4 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Sat, 18 Jan 2025 10:23:20 +0800 Subject: [PATCH 84/88] [SLP] Replace MainOp and AltOp in TreeEntry with InstructionsState. (#122443) Add TreeEntry::hasState. Add assert for getTreeEntry. Remove the OpValue parameter from the canReuseExtract function. Remove the Opcode parameter from the ComputeMaxBitWidth lambda function. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 229 +++++++++--------- .../AArch64/InstructionsState-is-invalid-0.ll | 64 +++++ .../InstructionsState-is-invalid-2.ll | 17 ++ .../X86/InstructionsState-is-invalid-1.ll | 25 ++ 4 files changed, 225 insertions(+), 110 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/InstructionsState-is-invalid-2.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/InstructionsState-is-invalid-1.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 40dd0d4cc4ef6..34d9abb4dc7a3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2414,15 +2414,17 @@ class BoUpSLP { } /// Go through the instructions in VL and append their operands. - void appendOperandsOfVL(ArrayRef VL, Instruction *VL0) { + void appendOperandsOfVL(ArrayRef VL, const InstructionsState &S) { assert(!VL.empty() && "Bad VL"); assert((empty() || VL.size() == getNumLanes()) && "Expected same number of lanes"); + assert(S.valid() && "InstructionsState is invalid."); // IntrinsicInst::isCommutative returns true if swapping the first "two" // arguments to the intrinsic produces the same result. constexpr unsigned IntrinsicNumOperands = 2; - unsigned NumOperands = VL0->getNumOperands(); - ArgSize = isa(VL0) ? IntrinsicNumOperands : NumOperands; + Instruction *MainOp = S.getMainOp(); + unsigned NumOperands = MainOp->getNumOperands(); + ArgSize = isa(MainOp) ? IntrinsicNumOperands : NumOperands; OpsVec.resize(NumOperands); unsigned NumLanes = VL.size(); for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { @@ -2441,19 +2443,19 @@ class BoUpSLP { // operations or alternating sequences (e.g., +, -), we can safely // tell the inverse operations by checking commutativity. if (isa(VL[Lane])) { - if (auto *EI = dyn_cast(VL0)) { + if (auto *EI = dyn_cast(MainOp)) { if (OpIdx == 0) { OpsVec[OpIdx][Lane] = {EI->getVectorOperand(), true, false}; continue; } - } else if (auto *EV = dyn_cast(VL0)) { + } else if (auto *EV = dyn_cast(MainOp)) { if (OpIdx == 0) { OpsVec[OpIdx][Lane] = {EV->getAggregateOperand(), true, false}; continue; } } OpsVec[OpIdx][Lane] = { - PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true, + PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true, false}; continue; } @@ -2566,11 +2568,12 @@ class BoUpSLP { public: /// Initialize with all the operands of the instruction vector \p RootVL. - VLOperands(ArrayRef RootVL, Instruction *VL0, const BoUpSLP &R) + VLOperands(ArrayRef RootVL, const InstructionsState &S, + const BoUpSLP &R) : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R), - L(R.LI->getLoopFor((VL0->getParent()))) { + L(R.LI->getLoopFor(S.getMainOp()->getParent())) { // Append all the operands of RootVL. - appendOperandsOfVL(RootVL, VL0); + appendOperandsOfVL(RootVL, S); } /// \Returns a value vector with the operands across all lanes for the @@ -3043,7 +3046,7 @@ class BoUpSLP { /// non-identity permutation that allows to reuse extract instructions. /// \param ResizeAllowed indicates whether it is allowed to handle subvector /// extract order. - bool canReuseExtract(ArrayRef VL, Value *OpValue, + bool canReuseExtract(ArrayRef VL, SmallVectorImpl &CurrentOrder, bool ResizeAllowed = false) const; @@ -3270,7 +3273,7 @@ class BoUpSLP { }; /// Checks if the current node is a gather node. - bool isGather() const {return State == NeedToGather; } + bool isGather() const { return State == NeedToGather; } /// A vector of scalars. ValueList Scalars; @@ -3334,9 +3337,9 @@ class BoUpSLP { /// reordering of operands during buildTree_rec() and vectorizeTree(). SmallVector Operands; - /// The main/alternate instruction. - Instruction *MainOp = nullptr; - Instruction *AltOp = nullptr; + /// MainOp and AltOp are recorded inside. S should be obtained from + /// newTreeEntry. + InstructionsState S = InstructionsState::invalid(); /// Interleaving factor for interleaved loads Vectorize nodes. unsigned InterleaveFactor = 0; @@ -3360,10 +3363,10 @@ class BoUpSLP { /// Set this bundle's operand from Scalars. void setOperand(const BoUpSLP &R, bool RequireReorder = false) { - VLOperands Ops(Scalars, MainOp, R); + VLOperands Ops(Scalars, S, R); if (RequireReorder) Ops.reorder(); - for (unsigned I : seq(MainOp->getNumOperands())) + for (unsigned I : seq(S.getMainOp()->getNumOperands())) setOperand(I, Ops.getVL(I)); } @@ -3396,13 +3399,9 @@ class BoUpSLP { } /// Some of the instructions in the list have alternate opcodes. - bool isAltShuffle() const { return MainOp != AltOp; } + bool isAltShuffle() const { return S.isAltShuffle(); } - bool isOpcodeOrAlt(Instruction *I) const { - unsigned CheckedOpcode = I->getOpcode(); - return (getOpcode() == CheckedOpcode || - getAltOpcode() == CheckedOpcode); - } + bool isOpcodeOrAlt(Instruction *I) const { return S.isOpcodeOrAlt(I); } /// Chooses the correct key for scheduling data. If \p Op has the same (or /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is @@ -3411,31 +3410,24 @@ class BoUpSLP { auto *I = dyn_cast(Op); if (I && isOpcodeOrAlt(I)) return Op; - return MainOp; + return S.getMainOp(); } void setOperations(const InstructionsState &S) { assert(S && "InstructionsState is invalid."); - MainOp = S.getMainOp(); - AltOp = S.getAltOp(); + this->S = S; } - Instruction *getMainOp() const { - return MainOp; - } + Instruction *getMainOp() const { return S.getMainOp(); } - Instruction *getAltOp() const { - return AltOp; - } + Instruction *getAltOp() const { return S.getAltOp(); } /// The main/alternate opcodes for the list of instructions. - unsigned getOpcode() const { - return MainOp ? MainOp->getOpcode() : 0; - } + unsigned getOpcode() const { return S.getOpcode(); } - unsigned getAltOpcode() const { - return AltOp ? AltOp->getOpcode() : 0; - } + unsigned getAltOpcode() const { return S.getAltOpcode(); } + + bool hasState() const { return S.valid(); } /// When ReuseReorderShuffleIndices is empty it just returns position of \p /// V within vector of Scalars. Otherwise, try to remap on its reuse index. @@ -3531,16 +3523,13 @@ class BoUpSLP { dbgs() << "CombinedVectorize\n"; break; } - dbgs() << "MainOp: "; - if (MainOp) - dbgs() << *MainOp << "\n"; - else - dbgs() << "NULL\n"; - dbgs() << "AltOp: "; - if (AltOp) - dbgs() << *AltOp << "\n"; - else - dbgs() << "NULL\n"; + if (S) { + dbgs() << "MainOp: " << *S.getMainOp() << "\n"; + dbgs() << "AltOp: " << *S.getAltOp() << "\n"; + } else { + dbgs() << "MainOp: NULL\n"; + dbgs() << "AltOp: NULL\n"; + } dbgs() << "VectorizedValue: "; if (VectorizedValue) dbgs() << *VectorizedValue << "\n"; @@ -3715,9 +3704,13 @@ class BoUpSLP { } #endif - TreeEntry *getTreeEntry(Value *V) { return ScalarToTreeEntry.lookup(V); } + TreeEntry *getTreeEntry(Value *V) { + assert(V && "V cannot be nullptr."); + return ScalarToTreeEntry.lookup(V); + } const TreeEntry *getTreeEntry(Value *V) const { + assert(V && "V cannot be nullptr."); return ScalarToTreeEntry.lookup(V); } @@ -5615,7 +5608,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // Try build correct order for extractelement instructions. SmallVector ReusedMask(TE.ReuseShuffleIndices.begin(), TE.ReuseShuffleIndices.end()); - if (TE.getOpcode() == Instruction::ExtractElement && + if (TE.hasState() && TE.getOpcode() == Instruction::ExtractElement && all_of(TE.Scalars, [Sz](Value *V) { if (isa(V)) return true; @@ -5777,10 +5770,11 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { return std::nullopt; // No need to reorder. return std::move(Phis); } - if (TE.isGather() && !TE.isAltShuffle() && allSameType(TE.Scalars)) { + if (TE.isGather() && (!TE.hasState() || !TE.isAltShuffle()) && + allSameType(TE.Scalars)) { // TODO: add analysis of other gather nodes with extractelement // instructions and other values/instructions, not only undefs. - if ((TE.getOpcode() == Instruction::ExtractElement || + if (((TE.hasState() && TE.getOpcode() == Instruction::ExtractElement) || (all_of(TE.Scalars, IsaPred) && any_of(TE.Scalars, IsaPred))) && all_of(TE.Scalars, [](Value *V) { @@ -5790,8 +5784,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // Check that gather of extractelements can be represented as // just a shuffle of a single vector. OrdersType CurrentOrder; - bool Reuse = canReuseExtract(TE.Scalars, TE.getMainOp(), CurrentOrder, - /*ResizeAllowed=*/true); + bool Reuse = + canReuseExtract(TE.Scalars, CurrentOrder, /*ResizeAllowed=*/true); if (Reuse || !CurrentOrder.empty()) return std::move(CurrentOrder); } @@ -5840,7 +5834,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { return Order; // Check if can include the order of vectorized loads. For masked gathers do // extra analysis later, so include such nodes into a special list. - if (TE.isGather() && TE.getOpcode() == Instruction::Load) { + if (TE.hasState() && TE.getOpcode() == Instruction::Load) { SmallVector PointerOps; OrdersType CurrentOrder; LoadsState Res = canVectorizeLoads(TE.Scalars, TE.Scalars.front(), @@ -5955,7 +5949,7 @@ void BoUpSLP::reorderTopToBottom() { // Patterns like [fadd,fsub] can be combined into a single instruction in // x86. Reordering them into [fsub,fadd] blocks this pattern. So we need // to take into account their order when looking for the most used order. - if (TE->isAltShuffle()) { + if (TE->hasState() && TE->isAltShuffle()) { VectorType *VecTy = getWidenedType(TE->Scalars[0]->getType(), TE->Scalars.size()); unsigned Opcode0 = TE->getOpcode(); @@ -6034,7 +6028,7 @@ void BoUpSLP::reorderTopToBottom() { if (It != GathersToOrders.end()) return It->second; } - if (OpTE->isAltShuffle()) { + if (OpTE->hasState() && OpTE->isAltShuffle()) { auto It = AltShufflesToOrders.find(OpTE); if (It != AltShufflesToOrders.end()) return It->second; @@ -7637,7 +7631,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( } case Instruction::ExtractValue: case Instruction::ExtractElement: { - bool Reuse = canReuseExtract(VL, VL0, CurrentOrder); + bool Reuse = canReuseExtract(VL, CurrentOrder); // FIXME: Vectorizing is not supported yet for non-power-of-2 ops. if (!has_single_bit(VL.size())) return TreeEntry::NeedToGather; @@ -8657,7 +8651,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, TE->dump()); ValueList Left, Right; - VLOperands Ops(VL, VL0, *this); + VLOperands Ops(VL, S, *this); if (cast(VL0)->isCommutative()) { // Commutative predicate - collect + sort operands of the instructions // so that each side is more likely to have the same opcode. @@ -8925,7 +8919,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const { return N; } -bool BoUpSLP::canReuseExtract(ArrayRef VL, Value *OpValue, +bool BoUpSLP::canReuseExtract(ArrayRef VL, SmallVectorImpl &CurrentOrder, bool ResizeAllowed) const { const auto *It = find_if(VL, IsaPred); @@ -9579,7 +9573,7 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) { // Do not reorder nodes if it small (just 2 elements), all-constant or all // instructions have same opcode already. - if (TE.Scalars.size() == 2 || (TE.getOpcode() && !TE.isAltShuffle()) || + if (TE.Scalars.size() == 2 || (TE.hasState() && !TE.isAltShuffle()) || all_of(TE.Scalars, isConstant)) return; @@ -9798,7 +9792,7 @@ void BoUpSLP::transformNodes() { // Do not try partial vectorization for small nodes (<= 2), nodes with the // same opcode and same parent block or all constants. if (VL.size() <= 2 || LoadEntriesToVectorize.contains(Idx) || - !(!E.getOpcode() || E.getOpcode() == Instruction::Load || + !(!E.hasState() || E.getOpcode() == Instruction::Load || E.isAltShuffle() || !allSameBlock(VL)) || allConstant(VL) || isSplat(VL)) continue; @@ -9921,6 +9915,7 @@ void BoUpSLP::transformNodes() { buildTree_rec(Slice, 0, EdgeInfo(&E, UINT_MAX)); if (PrevSize + 1 == VectorizableTree.size() && VectorizableTree[PrevSize]->isGather() && + VectorizableTree[PrevSize]->hasState() && VectorizableTree[PrevSize]->getOpcode() != Instruction::ExtractElement && !isSplat(Slice)) { @@ -9941,6 +9936,8 @@ void BoUpSLP::transformNodes() { E.ReorderIndices.clear(); } } + if (!E.hasState()) + continue; switch (E.getOpcode()) { case Instruction::Load: { // No need to reorder masked gather loads, just reorder the scalar @@ -10044,7 +10041,7 @@ void BoUpSLP::transformNodes() { if (LoadEntriesToVectorize.empty()) { // Single load node - exit. - if (VectorizableTree.size() <= 1 && + if (VectorizableTree.size() <= 1 && VectorizableTree.front()->hasState() && VectorizableTree.front()->getOpcode() == Instruction::Load) return; // Small graph with small VF - exit. @@ -10060,7 +10057,7 @@ void BoUpSLP::transformNodes() { getCanonicalGraphSize() <= SmallTree && count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()), [](const std::unique_ptr &TE) { - return TE->isGather() && + return TE->isGather() && TE->hasState() && TE->getOpcode() == Instruction::Load && !allSameBlock(TE->Scalars); }) == 1) @@ -10076,13 +10073,13 @@ void BoUpSLP::transformNodes() { for (std::unique_ptr &TE : VectorizableTree) { TreeEntry &E = *TE; if (E.isGather() && - (E.getOpcode() == Instruction::Load || - (!E.getOpcode() && any_of(E.Scalars, - [&](Value *V) { - return isa(V) && - !isVectorized(V) && - !isDeleted(cast(V)); - }))) && + ((E.hasState() && E.getOpcode() == Instruction::Load) || + (!E.hasState() && any_of(E.Scalars, + [&](Value *V) { + return isa(V) && + !isVectorized(V) && + !isDeleted(cast(V)); + }))) && !isSplat(E.Scalars)) { for (Value *V : E.Scalars) { auto *LI = dyn_cast(V); @@ -10676,7 +10673,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { bool PrevNodeFound = any_of( ArrayRef(R.VectorizableTree).take_front(E->Idx), [&](const std::unique_ptr &TE) { - return ((!TE->isAltShuffle() && + return ((TE->hasState() && !TE->isAltShuffle() && TE->getOpcode() == Instruction::ExtractElement) || TE->isGather()) && all_of(enumerate(TE->Scalars), [&](auto &&Data) { @@ -11801,7 +11798,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, for (const std::unique_ptr &TE : VectorizableTree) { if (TE.get() == E) break; - if (TE->isAltShuffle() && + if (TE->hasState() && TE->isAltShuffle() && ((TE->getOpcode() == E->getOpcode() && TE->getAltOpcode() == E->getAltOpcode()) || (TE->getOpcode() == E->getAltOpcode() && @@ -11963,10 +11960,12 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const { [this](Value *V) { return EphValues.contains(V); }) && (allConstant(TE->Scalars) || isSplat(TE->Scalars) || TE->Scalars.size() < Limit || - ((TE->getOpcode() == Instruction::ExtractElement || + (((TE->hasState() && + TE->getOpcode() == Instruction::ExtractElement) || all_of(TE->Scalars, IsaPred)) && isFixedVectorShuffle(TE->Scalars, Mask, AC)) || - (TE->getOpcode() == Instruction::Load && !TE->isAltShuffle()) || + (TE->hasState() && TE->getOpcode() == Instruction::Load && + !TE->isAltShuffle()) || any_of(TE->Scalars, IsaPred)); }; @@ -12095,9 +12094,10 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { !VectorizableTree.empty() && all_of(VectorizableTree, [&](const std::unique_ptr &TE) { return (TE->isGather() && - TE->getOpcode() != Instruction::ExtractElement && + (!TE->hasState() || + TE->getOpcode() != Instruction::ExtractElement) && count_if(TE->Scalars, IsaPred) <= Limit) || - TE->getOpcode() == Instruction::PHI; + (TE->hasState() && TE->getOpcode() == Instruction::PHI); })) return true; @@ -12115,7 +12115,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { // somewhere. bool IsAllowedSingleBVNode = VectorizableTree.size() > 1 || - (VectorizableTree.size() == 1 && VectorizableTree.front()->getOpcode() && + (VectorizableTree.size() == 1 && VectorizableTree.front()->hasState() && !VectorizableTree.front()->isAltShuffle() && VectorizableTree.front()->getOpcode() != Instruction::PHI && VectorizableTree.front()->getOpcode() != Instruction::GetElementPtr && @@ -12131,6 +12131,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { return false; if (VectorizableTree.back()->isGather() && + VectorizableTree.back()->hasState() && VectorizableTree.back()->isAltShuffle() && VectorizableTree.back()->getVectorFactor() > 2 && allSameBlock(VectorizableTree.back()->Scalars) && @@ -12155,7 +12156,7 @@ bool BoUpSLP::isTreeNotExtendable() const { getCanonicalGraphSize() <= SmallTree && count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()), [](const std::unique_ptr &TE) { - return TE->isGather() && + return TE->isGather() && TE->hasState() && TE->getOpcode() == Instruction::Load && !allSameBlock(TE->Scalars); }) == 1) @@ -12167,7 +12168,7 @@ bool BoUpSLP::isTreeNotExtendable() const { TreeEntry &E = *VectorizableTree[Idx]; if (!E.isGather()) continue; - if (E.getOpcode() && E.getOpcode() != Instruction::Load) + if (E.hasState() && E.getOpcode() != Instruction::Load) return false; if (isSplat(E.Scalars) || allConstant(E.Scalars)) continue; @@ -12477,7 +12478,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { TE.dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n"); continue; } - if (TE.isGather()) { + if (TE.isGather() && TE.hasState()) { if (const TreeEntry *E = getTreeEntry(TE.getMainOp()); E && E->getVectorFactor() == TE.getVectorFactor() && E->isSame(TE.Scalars)) { @@ -13626,9 +13627,11 @@ BoUpSLP::isGatherShuffledEntry( if (!TE->UserTreeIndices.empty() && TE->UserTreeIndices.front().UserTE->isGather() && TE->UserTreeIndices.front().EdgeIdx == UINT_MAX) { - assert((TE->Idx == 0 || TE->getOpcode() == Instruction::ExtractElement || - isSplat(TE->Scalars)) && - "Expected splat or extractelements only node."); + assert( + (TE->Idx == 0 || + (TE->hasState() && TE->getOpcode() == Instruction::ExtractElement) || + isSplat(TE->Scalars)) && + "Expected splat or extractelements only node."); return {}; } unsigned SliceSize = getPartNumElems(VL.size(), NumParts); @@ -14921,14 +14924,15 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, } } // Gather extracts after we check for full matched gathers only. - if (!ExtractShuffles.empty() || E->getOpcode() != Instruction::Load || - ((E->getOpcode() == Instruction::Load || + if (!ExtractShuffles.empty() || !E->hasState() || + E->getOpcode() != Instruction::Load || + (((E->hasState() && E->getOpcode() == Instruction::Load) || any_of(E->Scalars, IsaPred)) && any_of(E->Scalars, [this](Value *V) { return isa(V) && getTreeEntry(V); })) || - E->isAltShuffle() || + (E->hasState() && E->isAltShuffle()) || all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) { @@ -15308,7 +15312,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size()); if (E->isGather()) { // Set insert point for non-reduction initial nodes. - if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList) + if (E->hasState() && E->Idx == 0 && !UserIgnoreList) setInsertPointAfterBundle(E); Value *Vec = createBuildVector(E, ScalarTy, PostponedPHIs); E->VectorizedValue = Vec; @@ -18153,8 +18157,9 @@ static RecurKind getRdxKind(Value *V); void BoUpSLP::computeMinimumValueSizes() { // We only attempt to truncate integer expressions. bool IsStoreOrInsertElt = - VectorizableTree.front()->getOpcode() == Instruction::Store || - VectorizableTree.front()->getOpcode() == Instruction::InsertElement; + VectorizableTree.front()->hasState() && + (VectorizableTree.front()->getOpcode() == Instruction::Store || + VectorizableTree.front()->getOpcode() == Instruction::InsertElement); if ((IsStoreOrInsertElt || UserIgnoreList) && ExtraBitWidthNodes.size() <= 1 && (!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 || @@ -18195,10 +18200,9 @@ void BoUpSLP::computeMinimumValueSizes() { return; SmallVector ToDemote; - auto ComputeMaxBitWidth = [&](const TreeEntry &E, bool IsTopRoot, - bool IsProfitableToDemoteRoot, unsigned Opcode, - unsigned Limit, bool IsTruncRoot, - bool IsSignedCmp) -> unsigned { + auto ComputeMaxBitWidth = + [&](const TreeEntry &E, bool IsTopRoot, bool IsProfitableToDemoteRoot, + unsigned Limit, bool IsTruncRoot, bool IsSignedCmp) -> unsigned { ToDemote.clear(); // Check if the root is trunc and the next node is gather/buildvector, then // keep trunc in scalars, which is free in most cases. @@ -18239,11 +18243,14 @@ void BoUpSLP::computeMinimumValueSizes() { return MaxBitWidth; } + if (!E.hasState()) + return 0u; + unsigned VF = E.getVectorFactor(); Type *ScalarTy = E.Scalars.front()->getType(); unsigned ScalarTyNumElements = getNumElements(ScalarTy); auto *TreeRootIT = dyn_cast(ScalarTy->getScalarType()); - if (!TreeRootIT || !Opcode) + if (!TreeRootIT) return 0u; if (any_of(E.Scalars, @@ -18315,6 +18322,7 @@ void BoUpSLP::computeMinimumValueSizes() { IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF))) return 0u; + unsigned Opcode = E.getOpcode(); bool IsProfitableToDemote = Opcode == Instruction::Trunc || Opcode == Instruction::SExt || Opcode == Instruction::ZExt || NumParts > 1; @@ -18395,15 +18403,14 @@ void BoUpSLP::computeMinimumValueSizes() { while (NodeIdx < VectorizableTree.size()) { ArrayRef TreeRoot = VectorizableTree[NodeIdx]->Scalars; unsigned Limit = 2; - unsigned Opcode = VectorizableTree[NodeIdx]->getOpcode(); if (IsTopRoot && ReductionBitWidth == DL->getTypeSizeInBits( VectorizableTree.front()->Scalars.front()->getType())) Limit = 3; unsigned MaxBitWidth = ComputeMaxBitWidth( - *VectorizableTree[NodeIdx], IsTopRoot, IsProfitableToDemoteRoot, Opcode, - Limit, IsTruncRoot, IsSignedCmp); + *VectorizableTree[NodeIdx], IsTopRoot, IsProfitableToDemoteRoot, Limit, + IsTruncRoot, IsSignedCmp); if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) { if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth) ReductionBitWidth = bit_ceil(MaxBitWidth); @@ -18446,19 +18453,21 @@ void BoUpSLP::computeMinimumValueSizes() { }); IsSignedCmp = NodeIdx < VectorizableTree.size() && - any_of(VectorizableTree[NodeIdx]->UserTreeIndices, - [&](const EdgeInfo &EI) { - return EI.UserTE->getOpcode() == Instruction::ICmp && - any_of(EI.UserTE->Scalars, [&](Value *V) { - auto *IC = dyn_cast(V); - return IC && - (IC->isSigned() || - !isKnownNonNegative(IC->getOperand(0), - SimplifyQuery(*DL)) || - !isKnownNonNegative(IC->getOperand(1), - SimplifyQuery(*DL))); - }); - }); + any_of( + VectorizableTree[NodeIdx]->UserTreeIndices, + [&](const EdgeInfo &EI) { + return (EI.UserTE->hasState() && + EI.UserTE->getOpcode() == Instruction::ICmp) && + any_of(EI.UserTE->Scalars, [&](Value *V) { + auto *IC = dyn_cast(V); + return IC && + (IC->isSigned() || + !isKnownNonNegative(IC->getOperand(0), + SimplifyQuery(*DL)) || + !isKnownNonNegative(IC->getOperand(1), + SimplifyQuery(*DL))); + }); + }); } // If the maximum bit width we compute is less than the width of the roots' diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll new file mode 100644 index 0000000000000..ffb8f44363249 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -S %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64-unknown-linux-gnu" + +define void @foo(ptr %0) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: vector.scevcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP0:%.*]], i64 4 +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr null, i64 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[SCEVGEP]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x ptr> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[SCEVGEP3]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <4 x ptr> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP11]], [[TMP12]] +; CHECK-NEXT: br i1 [[OP_RDX]], label [[DOTLR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: ret void +; CHECK: .lr.ph: +; CHECK-NEXT: ret void +; +vector.scevcheck: + %scevgep = getelementptr i8, ptr %0, i64 4 + %scevgep3 = getelementptr i8, ptr null, i64 4 + %bound011 = icmp ult ptr %scevgep, null + %found.conflict13 = and i1 %bound011, false + %bound014 = icmp ult ptr %scevgep, null + %found.conflict16 = and i1 %bound014, false + %conflict.rdx17 = or i1 %found.conflict13, %found.conflict16 + %bound018 = icmp ult ptr %scevgep, null + %found.conflict20 = and i1 %bound018, false + %conflict.rdx21 = or i1 %conflict.rdx17, %found.conflict20 + %bound022 = icmp ult ptr %0, null + %found.conflict24 = and i1 %bound022, false + %conflict.rdx25 = or i1 %conflict.rdx21, %found.conflict24 + %bound026 = icmp ult ptr %0, null + %found.conflict28 = and i1 %bound026, false + %conflict.rdx29 = or i1 %conflict.rdx25, %found.conflict28 + %bound030 = icmp ult ptr %0, null + %found.conflict32 = and i1 %bound030, false + %conflict.rdx33 = or i1 %conflict.rdx29, %found.conflict32 + %bound034 = icmp ult ptr %0, null + %found.conflict36 = and i1 %bound034, false + %conflict.rdx37 = or i1 %conflict.rdx33, %found.conflict36 + %bound038 = icmp ult ptr %scevgep3, null + %found.conflict40 = and i1 %bound038, false + %conflict.rdx41 = or i1 %conflict.rdx37, %found.conflict40 + br i1 %conflict.rdx41, label %.lr.ph, label %vector.ph + +vector.ph: ; preds = %vector.scevcheck + ret void + +.lr.ph: ; preds = %vector.scevcheck + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/InstructionsState-is-invalid-2.ll b/llvm/test/Transforms/SLPVectorizer/InstructionsState-is-invalid-2.ll new file mode 100644 index 0000000000000..445fd81bb234d --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/InstructionsState-is-invalid-2.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -S %s | FileCheck %s + +define i32 @test(i32 %minlib) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL2_I306:%.*]] = mul i32 [[MINLIB:%.*]], [[MINLIB]] +; CHECK-NEXT: [[MUL3_I307:%.*]] = mul i32 [[MUL2_I306]], [[MINLIB]] +; CHECK-NEXT: [[CMP183:%.*]] = icmp sgt i32 [[MUL3_I307]], 0 +; CHECK-NEXT: ret i32 0 +; +entry: + %mul2.i306 = mul i32 %minlib, %minlib + %mul3.i307 = mul i32 %mul2.i306, %minlib + %cmp183 = icmp sgt i32 %mul3.i307, 0 + ret i32 0 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/InstructionsState-is-invalid-1.ll b/llvm/test/Transforms/SLPVectorizer/X86/InstructionsState-is-invalid-1.ll new file mode 100644 index 0000000000000..cade023300063 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/InstructionsState-is-invalid-1.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -S %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @_Z4blurN6Halide5Tools5ImageItEE(i1 %0, i1 %1, i1 %ident.check, i1 %ident.check56) { +; CHECK-LABEL: @_Z4blurN6Halide5Tools5ImageItEE( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0:%.*]], [[TMP1:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[IDENT_CHECK:%.*]], [[IDENT_CHECK56:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY6_US_I_I:%.*]], label [[FOR_BODY6_US_I_I]] +; CHECK: for.body6.us.i.i: +; CHECK-NEXT: ret void +; +entry: + %2 = or i1 %0, %1 + %3 = or i1 %ident.check, %ident.check56 + %4 = or i1 %3, %2 + br i1 %4, label %for.body6.us.i.i, label %for.body6.us.i.i + +for.body6.us.i.i: ; preds = %entry, %entry + ret void +} From 143c33c6dfd68f4e61d8e75c512bfdff02a7c687 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 17 Jan 2025 19:13:04 -0800 Subject: [PATCH 85/88] [RISCV] Consider only legally typed splats to be legal shuffles (#123415) Given the comment, I'd expected test coverage. There was none so let's do the simple thing which benefits the one thing we have tests for. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 112 ++++++------------ 2 files changed, 37 insertions(+), 83 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 33d67c9c407d8..f4f511a7368f8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5733,14 +5733,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, } bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { - // Support splats for any type. These should type legalize well. - if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) - return true; - // Only support legal VTs for other shuffles for now. if (!isTypeLegal(VT)) return false; + // Support splats for any type. These should type legalize well. + if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) + return true; + MVT SVT = VT.getSimpleVT(); // Not for i1 vectors. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index df1c803ca8850..8b26c58d5bee1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -1287,37 +1287,17 @@ define void @shuffle_i64_splat(ptr %p) nounwind { } define void @shuffle_i128_splat(ptr %p) nounwind { -; RV32-LABEL: shuffle_i128_splat: -; RV32: # %bb.0: -; RV32-NEXT: lw a1, 0(a0) -; RV32-NEXT: lw a2, 4(a0) -; RV32-NEXT: lw a3, 8(a0) -; RV32-NEXT: lw a4, 12(a0) -; RV32-NEXT: sw a1, 48(a0) -; RV32-NEXT: sw a2, 52(a0) -; RV32-NEXT: sw a3, 56(a0) -; RV32-NEXT: sw a4, 60(a0) -; RV32-NEXT: sw a1, 16(a0) -; RV32-NEXT: sw a2, 20(a0) -; RV32-NEXT: sw a3, 24(a0) -; RV32-NEXT: sw a4, 28(a0) -; RV32-NEXT: sw a1, 32(a0) -; RV32-NEXT: sw a2, 36(a0) -; RV32-NEXT: sw a3, 40(a0) -; RV32-NEXT: sw a4, 44(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: shuffle_i128_splat: -; RV64: # %bb.0: -; RV64-NEXT: ld a1, 0(a0) -; RV64-NEXT: ld a2, 8(a0) -; RV64-NEXT: sd a1, 48(a0) -; RV64-NEXT: sd a2, 56(a0) -; RV64-NEXT: sd a1, 16(a0) -; RV64-NEXT: sd a2, 24(a0) -; RV64-NEXT: sd a1, 32(a0) -; RV64-NEXT: sd a2, 40(a0) -; RV64-NEXT: ret +; CHECK-LABEL: shuffle_i128_splat: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: lui a1, 16 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v16, v8, v12 +; CHECK-NEXT: vse64.v v16, (a0) +; CHECK-NEXT: ret %a = load <4 x i128>, ptr %p %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> store <4 x i128> %res, ptr %p @@ -1327,58 +1307,32 @@ define void @shuffle_i128_splat(ptr %p) nounwind { define void @shuffle_i256_splat(ptr %p) nounwind { ; RV32-LABEL: shuffle_i256_splat: ; RV32: # %bb.0: -; RV32-NEXT: lw a1, 0(a0) -; RV32-NEXT: lw a2, 4(a0) -; RV32-NEXT: lw a3, 8(a0) -; RV32-NEXT: lw a4, 12(a0) -; RV32-NEXT: lw a5, 16(a0) -; RV32-NEXT: lw a6, 20(a0) -; RV32-NEXT: lw a7, 24(a0) -; RV32-NEXT: lw t0, 28(a0) -; RV32-NEXT: sw a5, 112(a0) -; RV32-NEXT: sw a6, 116(a0) -; RV32-NEXT: sw a7, 120(a0) -; RV32-NEXT: sw t0, 124(a0) -; RV32-NEXT: sw a1, 96(a0) -; RV32-NEXT: sw a2, 100(a0) -; RV32-NEXT: sw a3, 104(a0) -; RV32-NEXT: sw a4, 108(a0) -; RV32-NEXT: sw a5, 80(a0) -; RV32-NEXT: sw a6, 84(a0) -; RV32-NEXT: sw a7, 88(a0) -; RV32-NEXT: sw t0, 92(a0) -; RV32-NEXT: sw a1, 64(a0) -; RV32-NEXT: sw a2, 68(a0) -; RV32-NEXT: sw a3, 72(a0) -; RV32-NEXT: sw a4, 76(a0) -; RV32-NEXT: sw a5, 48(a0) -; RV32-NEXT: sw a6, 52(a0) -; RV32-NEXT: sw a7, 56(a0) -; RV32-NEXT: sw t0, 60(a0) -; RV32-NEXT: sw a1, 32(a0) -; RV32-NEXT: sw a2, 36(a0) -; RV32-NEXT: sw a3, 40(a0) -; RV32-NEXT: sw a4, 44(a0) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: lui a1, 12320 +; RV32-NEXT: addi a1, a1, 256 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV32-NEXT: vsext.vf2 v18, v16 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v24, v8, v18 +; RV32-NEXT: vse64.v v24, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: shuffle_i256_splat: ; RV64: # %bb.0: -; RV64-NEXT: ld a1, 0(a0) -; RV64-NEXT: ld a2, 8(a0) -; RV64-NEXT: ld a3, 16(a0) -; RV64-NEXT: ld a4, 24(a0) -; RV64-NEXT: sd a1, 96(a0) -; RV64-NEXT: sd a2, 104(a0) -; RV64-NEXT: sd a3, 112(a0) -; RV64-NEXT: sd a4, 120(a0) -; RV64-NEXT: sd a1, 32(a0) -; RV64-NEXT: sd a2, 40(a0) -; RV64-NEXT: sd a3, 48(a0) -; RV64-NEXT: sd a4, 56(a0) -; RV64-NEXT: sd a1, 64(a0) -; RV64-NEXT: sd a2, 72(a0) -; RV64-NEXT: sd a3, 80(a0) -; RV64-NEXT: sd a4, 88(a0) +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: lui a1, 98305 +; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: addi a1, a1, 1 +; RV64-NEXT: slli a1, a1, 16 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v16, a1 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v24, v8, v16 +; RV64-NEXT: vse64.v v24, (a0) ; RV64-NEXT: ret %a = load <4 x i256>, ptr %p %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> From a7bca1861bfcd1490319115c1027166e27f4ae27 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Fri, 17 Jan 2025 19:26:00 -0800 Subject: [PATCH 86/88] [clang-format] Correctly annotate braces in macro definitions (#123279) Fixes #123179. --- clang/lib/Format/UnwrappedLineParser.cpp | 6 +++--- clang/unittests/Format/FormatTest.cpp | 11 ----------- clang/unittests/Format/TokenAnnotatorTest.cpp | 19 ++++++++++++++++--- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 317717241c17c..198c05fd9dcd8 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -503,14 +503,14 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { auto *NextTok = Tokens->getNextNonComment(); if (!Line->InMacroBody && !Style.isTableGen()) { - // Skip PPDirective lines and comments. + // Skip PPDirective lines (except macro definitions) and comments. while (NextTok->is(tok::hash)) { NextTok = Tokens->getNextToken(); - if (NextTok->is(tok::pp_not_keyword)) + if (NextTok->isOneOf(tok::pp_not_keyword, tok::pp_define)) break; do { NextTok = Tokens->getNextToken(); - } while (!NextTok->HasUnescapedNewline && NextTok->isNot(tok::eof)); + } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); while (NextTok->is(tok::comment)) NextTok = Tokens->getNextToken(); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 4d48bcacddead..d3c97319abb94 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -5732,23 +5732,12 @@ TEST_F(FormatTest, HashInMacroDefinition) { verifyFormat("#define A void # ## #", getLLVMStyleWithColumns(22)); -#if 0 - // FIXME: The correct format is: verifyFormat("{\n" " {\n" "#define GEN_ID(_x) char *_x{#_x}\n" " GEN_ID(one);\n" " }\n" "}"); -#endif - verifyFormat("{\n" - " {\n" - "#define GEN_ID(_x) \\\n" - " char *_x { #_x }\n" - " GEN_ID(one);\n" - " }\n" - "}", - getGoogleStyle()); } TEST_F(FormatTest, RespectWhitespaceInMacroDefinitions) { diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 399502db52cbf..9ac60ce73750b 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3413,14 +3413,27 @@ TEST_F(TokenAnnotatorTest, BraceKind) { EXPECT_BRACE_KIND(Tokens[0], BK_Block); EXPECT_TOKEN(Tokens[1], tok::l_brace, TT_BlockLBrace); EXPECT_BRACE_KIND(Tokens[1], BK_Block); -#if 0 - // FIXME: EXPECT_BRACE_KIND(Tokens[11], BK_BracedInit); EXPECT_BRACE_KIND(Tokens[14], BK_BracedInit); -#endif EXPECT_BRACE_KIND(Tokens[20], BK_Block); EXPECT_BRACE_KIND(Tokens[21], BK_Block); + Tokens = annotate("{\n" + "#define FOO \\\n" + " { \\\n" + " case bar: { \\\n" + " break; \\\n" + " } \\\n" + " }\n" + "}"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_BlockLBrace); + EXPECT_BRACE_KIND(Tokens[4], BK_Block); + EXPECT_TOKEN(Tokens[7], tok::colon, TT_CaseLabelColon); + EXPECT_BRACE_KIND(Tokens[8], BK_Block); + EXPECT_BRACE_KIND(Tokens[11], BK_Block); + EXPECT_BRACE_KIND(Tokens[12], BK_Block); + Tokens = annotate("a = class extends goog.a {};", getGoogleStyle(FormatStyle::LK_JavaScript)); ASSERT_EQ(Tokens.size(), 11u) << Tokens; From b62e55803c52ca04093a0eea361407e849dc23e1 Mon Sep 17 00:00:00 2001 From: Wanyi Date: Fri, 17 Jan 2025 22:27:34 -0500 Subject: [PATCH 87/88] [lldb][test] Remove compiler version check and use regex (#123393) The test checks specific compiler version to determine the output. However, the compiler version string is always set to 15.0.0 for our local build. Remove this check and use regex match instead. ## Test Plan ``` ./bin/llvm-lit -sva /home/wanyi/llvm-sand/external/llvm-project/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py ... Skipping the following test categories: ['dsym', 'gmodules', 'debugserver', 'objc'] -- Command Output (stderr): -- UNSUPPORTED: LLDB (/home/wanyi/llvm-sand/build/Release+Distribution/fbcode-x86_64/toolchain/bin/clang-x86_64) :: test_dsym (TestVectorOfVectorsFromStdModule.TestVectorOfVectors) (test case does not fall in any category of interest for this run) PASS: LLDB (/home/wanyi/llvm-sand/build/Release+Distribution/fbcode-x86_64/toolchain/bin/clang-x86_64) :: test_dwarf (TestVectorOfVectorsFromStdModule.TestVectorOfVectors) PASS: LLDB (/home/wanyi/llvm-sand/build/Release+Distribution/fbcode-x86_64/toolchain/bin/clang-x86_64) :: test_dwo (TestVectorOfVectorsFromStdModule.TestVectorOfVectors) ---------------------------------------------------------------------- Ran 3 tests in 4.636s OK (skipped=1) -- ******************** Testing Time: 4.97s Total Discovered Tests: 1 Passed: 1 (100.00%) ``` --- .../TestDbgInfoContentVectorFromStdModule.py | 22 ++++----- .../TestVectorOfVectorsFromStdModule.py | 46 ++++++------------- 2 files changed, 23 insertions(+), 45 deletions(-) diff --git a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py index 1c32222e64f14..759077302bfca 100644 --- a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py @@ -23,13 +23,6 @@ def test(self): self.runCmd("settings set target.import-std-module true") - if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion( - [">", "16.0"] - ): - vector_type = "std::vector" - else: - vector_type = "std::vector >" - size_type = "size_type" value_type = "value_type" iterator = "iterator" @@ -41,13 +34,14 @@ def test(self): ValueCheck(name="current"), ] - self.expect_expr( - "a", - result_type=vector_type, - result_children=[ - ValueCheck(children=[ValueCheck(value="3")]), - ValueCheck(children=[ValueCheck(value="1")]), - ValueCheck(children=[ValueCheck(value="2")]), + self.expect( + "expr a", + patterns=[ + """\(std::vector )*>\) \$0 = size=3 \{ + \[0\] = \(a = 3\) + \[1\] = \(a = 1\) + \[2\] = \(a = 2\) +\}""" ], ) diff --git a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py index a1f33271f39d2..e18785ec1359c 100644 --- a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py @@ -17,42 +17,26 @@ def test(self): self, "// Set break point at this line.", lldb.SBFileSpec("main.cpp") ) - if self.expectedCompiler(["clang"]) and self.expectedCompilerVersion( - [">", "16.0"] - ): - vector_type = "std::vector" - vector_of_vector_type = "std::vector >" - else: - vector_type = "std::vector" - vector_of_vector_type = ( - "std::vector, std::allocator > >" - ) - size_type = "size_type" value_type = "value_type" self.runCmd("settings set target.import-std-module true") - self.expect_expr( - "a", - result_type=vector_of_vector_type, - result_children=[ - ValueCheck( - type=vector_type, - children=[ - ValueCheck(value="1"), - ValueCheck(value="2"), - ValueCheck(value="3"), - ], - ), - ValueCheck( - type=vector_type, - children=[ - ValueCheck(value="3"), - ValueCheck(value="2"), - ValueCheck(value="1"), - ], - ), + self.expect( + "expr a", + patterns=[ + """\(std::vector(, std::allocator )* >\) \$0 = size=2 \{ + \[0\] = size=3 \{ + \[0\] = 1 + \[1\] = 2 + \[2\] = 3 + \} + \[1\] = size=3 \{ + \[0\] = 3 + \[1\] = 2 + \[2\] = 1 + \} +\}""" ], ) self.expect_expr("a.size()", result_type=size_type, result_value="2") From e2402615a5a76d46a433dfcc1de10b38a1263c9d Mon Sep 17 00:00:00 2001 From: Ander Date: Sat, 18 Jan 2025 04:45:10 +0100 Subject: [PATCH 88/88] [clang-format] Fix option `BreakBinaryOperations` for operator `>>` (#122282) Fixes #106228. --- clang/lib/Format/ContinuationIndenter.cpp | 1 + clang/unittests/Format/FormatTest.cpp | 33 +++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 554b55fa75c92..c311deaa17bb0 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -148,6 +148,7 @@ static bool startsNextOperand(const FormatToken &Current) { static bool mustBreakBinaryOperation(const FormatToken &Current, const FormatStyle &Style) { return Style.BreakBinaryOperations != FormatStyle::BBO_Never && + Current.CanBreakBefore && (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ? startsNextOperand : isAlignableBinaryOperator)(Current); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index d3c97319abb94..f8d13cd0ce250 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -27976,6 +27976,11 @@ TEST_F(FormatTest, BreakBinaryOperations) { " operand1 + operand2 - (operand3 + operand4);", Style); + // Check operator>> special case. + verifyFormat("std::cin >> longOperand_1 >> longOperand_2 >>\n" + " longOperand_3_;", + Style); + Style.BreakBinaryOperations = FormatStyle::BBO_OnePerLine; // Logical operations @@ -28054,6 +28059,13 @@ TEST_F(FormatTest, BreakBinaryOperations) { " operand6->member;", Style); + // Check operator>> special case. + verifyFormat("std::cin >>\n" + " longOperand_1 >>\n" + " longOperand_2 >>\n" + " longOperand_3_;", + Style); + Style.BreakBinaryOperations = FormatStyle::BBO_RespectPrecedence; verifyFormat("result = op1 + op2 * op3 - op4;", Style); @@ -28079,6 +28091,13 @@ TEST_F(FormatTest, BreakBinaryOperations) { " byte_buffer[3] << 24;", Style); + // Check operator>> special case. + verifyFormat("std::cin >>\n" + " longOperand_1 >>\n" + " longOperand_2 >>\n" + " longOperand_3_;", + Style); + Style.BreakBinaryOperations = FormatStyle::BBO_OnePerLine; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; @@ -28153,6 +28172,13 @@ TEST_F(FormatTest, BreakBinaryOperations) { " << 24;", Style); + // Check operator>> special case. + verifyFormat("std::cin\n" + " >> longOperand_1\n" + " >> longOperand_2\n" + " >> longOperand_3_;", + Style); + Style.BreakBinaryOperations = FormatStyle::BBO_RespectPrecedence; verifyFormat("result = op1 + op2 * op3 - op4;", Style); @@ -28177,6 +28203,13 @@ TEST_F(FormatTest, BreakBinaryOperations) { " | byte_buffer[2] << 16\n" " | byte_buffer[3] << 24;", Style); + + // Check operator>> special case. + verifyFormat("std::cin\n" + " >> longOperand_1\n" + " >> longOperand_2\n" + " >> longOperand_3_;", + Style); } TEST_F(FormatTest, RemoveEmptyLinesInUnwrappedLines) {