From c1d01b2fc2932ca3ae6fb81a978f260298dbf343 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Wed, 8 Jan 2025 16:45:59 +0800 Subject: [PATCH 1/9] [mlir][tosa] Add missing verifier for `tosa.pad` (#120934) This PR adds a missing verifier for `tosa.pad`, ensuring that the padding shape matches [2*rank(shape1)] according to V1.0.0 Specification. Fixes #119840. --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 10 ++-- .../mlir/Dialect/Tosa/IR/TosaTypesBase.td | 14 ++--- .../Conversion/TosaToTensor/TosaToTensor.cpp | 12 ++--- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 12 +++-- .../Tosa/Transforms/TosaDecomposeConv2D.cpp | 2 +- .../Transforms/TosaDecomposeDepthwise.cpp | 2 +- .../Transforms/TosaDecomposeTransposeConv.cpp | 6 +-- .../TosaToTensor/tosa-to-tensor.mlir | 52 ++++++------------- mlir/test/Dialect/Tosa/canonicalize.mlir | 27 +++++----- mlir/test/Dialect/Tosa/invalid.mlir | 30 +++++++---- mlir/test/Dialect/Tosa/ops.mlir | 8 +-- .../Dialect/Tosa/tosa-decompose-conv2d.mlir | 4 +- .../Tosa/tosa-decompose-depthwise.mlir | 4 +- .../Tosa/tosa-decompose-transpose-conv.mlir | 14 ++--- mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir | 18 +++---- 15 files changed, 100 insertions(+), 115 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 8ae5d3ab417b6..b4f61c1a8f3a8 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -1552,21 +1552,21 @@ def Tosa_PadOp : Tosa_InferShapedTypeOp<"pad"> { Example: ```mlir - %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> - tosa.pad %arg0, %0 : (tensor<1x2xf32>, tensor<2x2xi32>) -> (tensor<4x9xf32>) + %0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> + tosa.pad %arg0, %0 : (tensor<1x2xf32>, tensor<4xi32>) -> (tensor<4x9xf32>) ``` Example 2: ```mlir - %0 = arith.constant dense<[[-1, 2], [3, 4]]> : tensor<2x2xi32> - tosa.pad %arg0, %0 : (tensor<1x2xf32>, tensor<2x2xi32>) -> (tensor) + %0 = arith.constant dense<[-1, 2, 3, 4]> : tensor<4xi32> + tosa.pad %arg0, %0 : (tensor<1x2xf32>, tensor<4xi32>) -> (tensor) ``` }]; let arguments = (ins Tosa_RankedTensor:$input1, - Tosa_Int32Or64Tensor:$padding, + TosaTensorRankOf<[Tosa_Int32Or64], [1]>:$padding, Optional:$pad_const, OptionalAttr:$quantization_info ); diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td index a6d3163d4446f..d3cc6e92bac22 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td @@ -65,17 +65,17 @@ def Tosa_Int32Or64 : AnyTypeOf<[Tosa_Int32, // int8 : symmetric per tensor/per channel, signed // int16 : symmetric per tensor, signed //===----------------------------------------------------------------------===// -def Tosa_QuantizedInt : AnyTypeOf<[ Tosa_QuantizedType<"uint8", [8], 0>, - Tosa_QuantizedType<"int4", [4, 0], 1>, - Tosa_QuantizedType<"int8", [8, 0], 1>, - Tosa_QuantizedType<"int16", [16, 0], 1>, - Tosa_QuantizedType<"int32", [32, 0], 1>]>; +def Tosa_QuantizedInt : AnyTypeOf<[Tosa_QuantizedType<"uint8", [8], 0>, + Tosa_QuantizedType<"int4", [4, 0], 1>, + Tosa_QuantizedType<"int8", [8, 0], 1>, + Tosa_QuantizedType<"int16", [16, 0], 1>, + Tosa_QuantizedType<"int32", [32, 0], 1>]>; //===----------------------------------------------------------------------===// // Multi-category types. //===----------------------------------------------------------------------===// def Tosa_AnyNumber : AnyTypeOf<[Tosa_Int, Tosa_QuantizedInt, AnyFloat], - "number">; + "number">; // For weight tensors from tosa::Conv2DOp, tosa::Conv3DOp, // tosa::DepthwiseConv2DOp, tosa::TransposeConv2DOp, tosa::FullyConnectedOp @@ -112,7 +112,7 @@ class TosaTensorRankOf allowedTypes, list ranks> def Tosa_I1Tensor : TosaTensorOf<[I1]>; def Tosa_Int32Tensor : TosaTensorOf<[Tosa_Int32]>; -def Tosa_Int32Or64Tensor :TosaTensorOf<[Tosa_Int32Or64]>; +def Tosa_Int32Or64Tensor : TosaTensorOf<[Tosa_Int32Or64]>; def Tosa_FloatTensor : TosaTensorOf<[AnyFloat]>; diff --git a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp index 6f085cb6ed06d..b5a0da15e780e 100644 --- a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp +++ b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp @@ -338,11 +338,6 @@ class PadConverter : public OpConversionPattern { padOp, "tosa.pad was unable to determine the pad constant value."); } - Value lowIndex = - rewriter.create(loc, rewriter.getIndexAttr(0)); - Value highIndex = - rewriter.create(loc, rewriter.getIndexAttr(1)); - SmallVector lowValues; SmallVector highValues; @@ -350,11 +345,12 @@ class PadConverter : public OpConversionPattern { highValues.reserve(rank); for (int i = 0; i < rank; i++) { - Value inputIndex = rewriter.create(loc, i); + Value lowIndex = rewriter.create(loc, 2 * i); + Value highIndex = rewriter.create(loc, 2 * i + 1); Value lowVal = rewriter.createOrFold( - loc, padding, ValueRange({inputIndex, lowIndex})); + loc, padding, ValueRange({lowIndex})); Value highVal = rewriter.createOrFold( - loc, padding, ValueRange({inputIndex, highIndex})); + loc, padding, ValueRange({highIndex})); lowVal = rewriter.createOrFold( loc, rewriter.getIndexType(), lowVal); diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 631d3c48f2df0..a46402a496839 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -787,7 +787,7 @@ LogicalResult tosa::PadOp::inferReturnTypeComponents( return success(); } - outputShape.resize(paddingShape.getDimSize(0), ShapedType::kDynamic); + outputShape.resize(paddingShape.getDimSize(0) / 2, ShapedType::kDynamic); inferredReturnShapes.push_back(ShapedTypeComponents(outputShape)); return success(); } @@ -823,13 +823,17 @@ LogicalResult tosa::PadOp::inferReturnTypeComponents( LogicalResult tosa::PadOp::verify() { RankedTensorType inputType = getInput1().getType(); RankedTensorType outputType = getOutput().getType(); - TensorType paddingType = getPadding().getType(); + RankedTensorType paddingType = getPadding().getType(); if (inputType.getRank() != outputType.getRank()) return emitOpError() << "expect same input and output tensor rank."; - if (paddingType.hasRank() && paddingType.getRank() != 2) - return emitOpError() << "expect 'padding' tensor rank equal to 2."; + if (!paddingType.isDynamicDim(0) && + paddingType.getDimSize(0) != inputType.getRank() * 2) + return emitOpError() << "expected padding tensor dim 0 to have size " + << inputType.getRank() * 2 + << " (2*rank(shape1)) but got size " + << paddingType.getDimSize(0); return success(); } diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp index 44f64f76e9b02..04a709c596779 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp @@ -81,7 +81,7 @@ struct Conv2DIsFullyConnected : public OpRewritePattern { } } - auto padSizeTy = RankedTensorType::get({4, 2}, rewriter.getI64Type()); + auto padSizeTy = RankedTensorType::get({8}, rewriter.getI64Type()); auto padSize = DenseIntElementsAttr::get(padSizeTy, ArrayRef(pad)); Value padSizeVal = diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp index e6fba211dc37a..14f392ab8c45c 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp @@ -108,7 +108,7 @@ struct DepthwiseConv2DIsMul : public OpRewritePattern { } } - auto padSizeTy = RankedTensorType::get({5, 2}, rewriter.getI64Type()); + auto padSizeTy = RankedTensorType::get({10}, rewriter.getI64Type()); auto padSize = DenseIntElementsAttr::get(padSizeTy, ArrayRef(pad)); Value padSizeVal = diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp index 0779cdb9667a1..fda39c516077d 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp @@ -139,7 +139,7 @@ class TransposeConvStridedConverter weightPadding[5] = (weightWidth % stride[1]) ? (stride[1] - weightWidth % stride[1]) : 0; DenseElementsAttr weightPaddingAttr = DenseIntElementsAttr::get( - RankedTensorType::get({4, 2}, rewriter.getI32Type()), weightPadding); + RankedTensorType::get({8}, rewriter.getI32Type()), weightPadding); Value weightPaddingVal = CreateOpAndInferShape( rewriter, loc, weightPaddingAttr.getType(), weightPaddingAttr); @@ -202,7 +202,7 @@ class TransposeConvStridedConverter inputPadding[5] += restridedWeightTy.getDimSize(2) - 1; DenseElementsAttr inputPaddingAttr = DenseIntElementsAttr::get( - RankedTensorType::get({4, 2}, rewriter.getI32Type()), inputPadding); + RankedTensorType::get({8}, rewriter.getI32Type()), inputPadding); Value inputPaddingVal = CreateOpAndInferShape( rewriter, loc, inputPaddingAttr.getType(), inputPaddingAttr); @@ -314,7 +314,7 @@ class TransposeConvStridedConverter resultPadding[5] = resultTy.getDimSize(2) - resultPadLeft - sliceSize[2]; DenseElementsAttr resultPaddingAttr = DenseIntElementsAttr::get( - RankedTensorType::get({4, 2}, rewriter.getI32Type()), resultPadding); + RankedTensorType::get({8}, rewriter.getI32Type()), resultPadding); Value resultPaddingVal = CreateOpAndInferShape( rewriter, loc, resultPaddingAttr.getType(), resultPaddingAttr); diff --git a/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir b/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir index 1e62e25176a00..0b9a64494bc0f 100644 --- a/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir +++ b/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir @@ -459,85 +459,65 @@ func.func @slice_dyn(%arg0: tensor) -> (tensor) { // CHECK-LABEL: @pad_float // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: func.func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) { - %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> - // TODO: Output contains multiple "arith.constant 1 : index". - // CHECK-DAG: [[INDEX1:%.+]] = arith.constant 1 : index - // CHECK-DAG: [[INDEX2:%.+]] = arith.constant 2 : index - // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index - // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index + %0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> // CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32 - // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { + // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, %{{.*}}] high{{\[}}%{{.*}}, %{{.*}}] { // CHECK: tensor.yield [[CST]] // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32> - %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xf32>, tensor<2x2xi32>) -> (tensor<4x9xf32>) + %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xf32>, tensor<4xi32>) -> (tensor<4x9xf32>) return %1 : tensor<4x9xf32> } func.func @pad_int(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) { - %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> + %0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> // CHECK: [[CST:%.+]] = arith.constant 0 : i32 // CHECK: tensor.pad // CHECK: tensor.yield [[CST]] - %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xi32>, tensor<2x2xi32>) -> (tensor<4x9xi32>) + %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xi32>, tensor<4xi32>) -> (tensor<4x9xi32>) return %1 : tensor<4x9xi32> } func.func @pad_quant(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) { - %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> + %0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> // CHECK: [[CST:%.+]] = arith.constant 42 : i32 // CHECK: tensor.pad // CHECK: tensor.yield [[CST]] - %1 = "tosa.pad"(%arg0, %0) {quantization_info = #tosa.pad_quant} : (tensor<1x2xi32>, tensor<2x2xi32>) -> (tensor<4x9xi32>) + %1 = "tosa.pad"(%arg0, %0) {quantization_info = #tosa.pad_quant} : (tensor<1x2xi32>, tensor<4xi32>) -> (tensor<4x9xi32>) return %1 : tensor<4x9xi32> } // ----- func.func @pad_float_explicit(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) { - %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> - // TODO: Output contains multiple "arith.constant 1 : index". - // CHECK-DAG: [[INDEX1:%.+]] = arith.constant 1 : index - // CHECK-DAG: [[INDEX2:%.+]] = arith.constant 2 : index - // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index - // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index + %0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> // CHECK-DAG: [[CST:%.+]] = arith.constant 4.200000e+01 : f32 - // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { + // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, %{{.*}}] high{{\[}}%{{.*}}, %{{.*}}] { // CHECK: tensor.yield [[CST]] // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32> %1 = arith.constant dense<42.0> : tensor - %2 = "tosa.pad"(%arg0, %0, %1) : (tensor<1x2xf32>, tensor<2x2xi32>, tensor) -> (tensor<4x9xf32>) + %2 = "tosa.pad"(%arg0, %0, %1) : (tensor<1x2xf32>, tensor<4xi32>, tensor) -> (tensor<4x9xf32>) return %2 : tensor<4x9xf32> } // ----- func.func @pad_dyn_input(%arg0 : tensor) -> (tensor) { - %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> - // TODO: Output contains multiple "arith.constant 1 : index". - // CHECK-DAG: [[INDEX1:%.+]] = arith.constant 1 : index - // CHECK-DAG: [[INDEX2:%.+]] = arith.constant 2 : index - // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index - // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index + %0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> // CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32 - // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { + // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, %{{.*}}] high{{\[}}%{{.*}}, %{{.*}}] { // CHECK: tensor.yield [[CST]] // CHECK: } : tensor to tensor - %1 = "tosa.pad"(%arg0, %0) : (tensor, tensor<2x2xi32>) -> (tensor) + %1 = "tosa.pad"(%arg0, %0) : (tensor, tensor<4xi32>) -> (tensor) return %1 : tensor } func.func @pad_dyn_padding(%arg0 : tensor<1x2xf32>) -> (tensor) { - %0 = arith.constant dense<[[-1, 2], [3, 4]]> : tensor<2x2xi32> - // TODO: Output contains multiple "arith.constant 1 : index". - // CHECK-DAG: [[INDEX1:%.+]] = arith.constant 1 : index - // CHECK-DAG: [[INDEX2:%.+]] = arith.constant 2 : index - // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index - // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index + %0 = arith.constant dense<[-1, 2, 3, 4]> : tensor<4xi32> // CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32 - // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { + // CHECK: tensor.pad %[[ARG0]] low{{\[}}%{{.*}}, %{{.*}}] high{{\[}}%{{.*}}, %{{.*}}] { // CHECK: tensor.yield [[CST]] // CHECK: } : tensor<1x2xf32> to tensor - %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xf32>, tensor<2x2xi32>) -> (tensor) + %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xf32>, tensor<4xi32>) -> (tensor) return %1 : tensor } diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir index 67cd01f62f0bd..063b0b2095df0 100644 --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -210,8 +210,8 @@ func.func @max_pool2d_is_noop(%arg0: tensor<10x1x1x3xf32>) -> tensor<10x1x1x3xf3 // CHECK-LABEL: @pad_noop func.func @pad_noop(%arg0: tensor) -> tensor { // CHECK: return %arg0 - %0 = "tosa.const"() { value = dense<0> : tensor<2x2xi32>} : () -> tensor<2x2xi32> - %1 = tosa.pad %arg0, %0 : (tensor, tensor<2x2xi32>) -> tensor + %0 = "tosa.const"() { value = dense<0> : tensor<4xi32>} : () -> tensor<4xi32> + %1 = tosa.pad %arg0, %0 : (tensor, tensor<4xi32>) -> tensor return %1 : tensor } @@ -221,8 +221,8 @@ func.func @pad_noop(%arg0: tensor) -> tensor { func.func @pad_noop_padding_mismatch_nofold(%arg0: tensor) -> tensor { // CHECK: %[[PAD:.+]] = tosa.pad // CHECK: return %[[PAD]] - %0 = "tosa.const"() { value = dense_resource<__elided__> : tensor<2x2xi32>} : () -> tensor<2x2xi32> - %1 = tosa.pad %arg0, %0 : (tensor, tensor<2x2xi32>) -> tensor + %0 = "tosa.const"() { value = dense_resource<__elided__> : tensor<4xi32>} : () -> tensor<4xi32> + %1 = tosa.pad %arg0, %0 : (tensor, tensor<4xi32>) -> tensor return %1 : tensor } @@ -234,42 +234,39 @@ func.func @pad_noop_type_mismatch_nofold(%arg0: tensor<10xf32>) -> tensor // CHECK: return %[[PAD]] %c0_i32 = arith.constant 0 : i32 - %shape = tensor.from_elements %c0_i32, %c0_i32 : tensor<1x2xi32> + %shape = tensor.from_elements %c0_i32, %c0_i32 : tensor<2xi32> - %0 = tosa.pad %arg0, %shape : (tensor<10xf32>, tensor<1x2xi32>) -> tensor + %0 = tosa.pad %arg0, %shape : (tensor<10xf32>, tensor<2xi32>) -> tensor return %0 : tensor } // ----- // CHECK-LABEL: @pad_determine_val_i32 -func.func @pad_determine_val_i32(%arg0: tensor, %arg1 : tensor<2x2xi32>) -> tensor { +func.func @pad_determine_val_i32(%arg0: tensor, %arg1 : tensor<4xi32>) -> tensor { // CHECK: %[[ZERO:.+]] = "tosa.const"() <{value = dense<0> : tensor} // CHECK: tosa.pad %arg0, %arg1, %[[ZERO]] - %0 = "tosa.const"() { value = dense<[[1, 0], [0, 1]]> : tensor<2x2xi32>} : () -> tensor<2x2xi32> - %1 = tosa.pad %arg0, %arg1 : (tensor, tensor<2x2xi32>) -> tensor + %1 = tosa.pad %arg0, %arg1 : (tensor, tensor<4xi32>) -> tensor return %1 : tensor } // ----- // CHECK-LABEL: @pad_determine_val_f32 -func.func @pad_determine_val_f32(%arg0: tensor, %arg1 : tensor<2x2xi32>) -> tensor { +func.func @pad_determine_val_f32(%arg0: tensor, %arg1 : tensor<4xi32>) -> tensor { // CHECK: %[[ZERO:.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor} // CHECK: tosa.pad %arg0, %arg1, %[[ZERO]] - %0 = "tosa.const"() { value = dense<[[1, 0], [0, 1]]> : tensor<2x2xi32>} : () -> tensor<2x2xi32> - %1 = tosa.pad %arg0, %arg1 : (tensor, tensor<2x2xi32>) -> tensor + %1 = tosa.pad %arg0, %arg1 : (tensor, tensor<4xi32>) -> tensor return %1 : tensor } // ----- // CHECK-LABEL: @pad_determine_val_quant -func.func @pad_determine_val_quant(%arg0: tensor, %arg1 : tensor<2x2xi32>) -> tensor { +func.func @pad_determine_val_quant(%arg0: tensor, %arg1 : tensor<4xi32>) -> tensor { // CHECK: %[[ZERO:.+]] = "tosa.const"() <{value = dense<42> : tensor} // CHECK: tosa.pad %arg0, %arg1, %[[ZERO]] - %0 = "tosa.const"() { value = dense<[[1, 0], [0, 1]]> : tensor<2x2xi32>} : () -> tensor<2x2xi32> - %1 = tosa.pad %arg0, %arg1 {quantization_info = #tosa.pad_quant} : (tensor, tensor<2x2xi32>) -> tensor + %1 = tosa.pad %arg0, %arg1 {quantization_info = #tosa.pad_quant} : (tensor, tensor<4xi32>) -> tensor return %1 : tensor } diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index b796a6343e5ed..9aa2f8eaac899 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -77,48 +77,56 @@ func.func @test_concat_element_type_mismatch(%arg0 : tensor<1x2xf32>, %arg1 : te // ----- -func.func @test_pad_non_const(%arg0: tensor<13x21x3xf32>, %arg1: tensor<3x2xi32>) -> tensor<13x21x3xf32> { +func.func @test_pad_non_const(%arg0: tensor<13x21x3xf32>, %arg1: tensor<6xi32>) -> tensor<13x21x3xf32> { // expected-error@+1 {{'tosa.pad' op padding of pad is not constant}} - %0 = tosa.pad %arg0, %arg1 : (tensor<13x21x3xf32>, tensor<3x2xi32>) -> tensor<13x21x3xf32> + %0 = tosa.pad %arg0, %arg1 : (tensor<13x21x3xf32>, tensor<6xi32>) -> tensor<13x21x3xf32> return %0 : tensor<13x21x3xf32> } // ----- func.func @test_pad_non_const(%arg0: tensor<13x21x3xi8>, %arg1: tensor) -> tensor<13x21x3xi8> { - %0 = "tosa.const"() {value = dense<[[0, 0], [0, 1], [0, 1]]> : tensor<3x2xi32>} : () -> tensor<3x2xi32> + %0 = "tosa.const"() {value = dense<[0, 0, 0, 1, 0, 1]> : tensor<6xi32>} : () -> tensor<6xi32> // expected-error@+1 {{'tosa.pad' op pad_const of pad is not constant}} - %1 = tosa.pad %arg0, %0, %arg1 : (tensor<13x21x3xi8>, tensor<3x2xi32>, tensor) -> tensor<13x21x3xi8> + %1 = tosa.pad %arg0, %0, %arg1 : (tensor<13x21x3xi8>, tensor<6xi32>, tensor) -> tensor<13x21x3xi8> return %1 : tensor<13x21x3xi8> } // ----- -func.func @test_pad_io_rank_mismatch(%arg0: tensor<13x21xf32>, %arg1: tensor<2x2xi32>) { +func.func @test_pad_io_rank_mismatch(%arg0: tensor<13x21xf32>, %arg1: tensor<4xi32>) { // expected-error@+1 {{'tosa.pad' op expect same input and output tensor rank.}} - %1 = tosa.pad %arg0, %arg1 : (tensor<13x21xf32>, tensor<2x2xi32>) -> tensor<13x21x3xf32> + %1 = tosa.pad %arg0, %arg1 : (tensor<13x21xf32>, tensor<4xi32>) -> tensor<13x21x3xf32> return } // ----- -func.func @test_pad_invalid_padding_rank(%arg0: tensor<13x21xf32>, %arg1: tensor<2xi32>) { - // expected-error@+1 {{'tosa.pad' op expect 'padding' tensor rank equal to 2.}} - %1 = tosa.pad %arg0, %arg1 : (tensor<13x21xf32>, tensor<2xi32>) -> tensor<13x21xf32> +func.func @test_pad_invalid_padding_rank(%arg0: tensor<13x21xf32>, %arg1: tensor<2x2xi32>) { + // expected-error@+1 {{'tosa.pad' op operand #1 must be 1D tensor of 32-bit signless integer or 64-bit signless integer values, but got 'tensor<2x2xi32>'}} + %1 = tosa.pad %arg0, %arg1 : (tensor<13x21xf32>, tensor<2x2xi32>) -> tensor<13x21xf32> return } // ----- -func.func @test_pad_invalid_padConst_rank(%arg0: tensor<13x21xf32>, %arg1: tensor<2x2xi32>) { +func.func @test_pad_invalid_padConst_rank(%arg0: tensor<13x21xf32>, %arg1: tensor<4xi32>) { %0 = "tosa.const"() {value = dense<3.14> : tensor<1xf32>} : () -> tensor<1xf32> // expected-error@+1 {{'tosa.pad' op operand #2 must be 0D tensor of number values, but got 'tensor<1xf32>'}} - %1 = tosa.pad %arg0, %arg1, %0 : (tensor<13x21xf32>, tensor<2x2xi32>, tensor<1xf32>) -> tensor<13x21xf32> + %1 = tosa.pad %arg0, %arg1, %0 : (tensor<13x21xf32>, tensor<4xi32>, tensor<1xf32>) -> tensor<13x21xf32> return } // ----- +func.func @test_pad_padding_shape_mismatch(%arg0: tensor<13x21x3xf32>, %arg1: tensor<4xi32>) -> tensor<13x21x3xf32> { + // expected-error@+1 {{'tosa.pad' op expected padding tensor dim 0 to have size 6 (2*rank(shape1)) but got size 4}} + %0 = tosa.pad %arg0, %arg1 : (tensor<13x21x3xf32>, tensor<4xi32>) -> tensor<13x21x3xf32> + return %0 : tensor<13x21x3xf32> +} + +// ----- + func.func @test_transpose_non_const(%arg0: tensor<13x21x3xf32>, %arg1: tensor<3xi32>) -> tensor<3x13x21xf32> { // expected-error@+1 {{'tosa.transpose' op perms of transpose is not constant}} %0 = tosa.transpose %arg0, %arg1 : (tensor<13x21x3xf32>, tensor<3xi32>) -> tensor<3x13x21xf32> diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir index 88fa94ae90db6..a8c86960a6c86 100644 --- a/mlir/test/Dialect/Tosa/ops.mlir +++ b/mlir/test/Dialect/Tosa/ops.mlir @@ -525,16 +525,16 @@ func.func @test_concat(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x21x3xf32>) - // ----- // CHECK-LABEL: pad -func.func @test_pad(%arg0: tensor<13x21x3xf32>, %arg1: tensor<3x2xi32>) -> tensor<13x21x3xf32> { - %0 = tosa.pad %arg0, %arg1 : (tensor<13x21x3xf32>, tensor<3x2xi32>) -> tensor<13x21x3xf32> +func.func @test_pad(%arg0: tensor<13x21x3xf32>, %arg1: tensor<6xi32>) -> tensor<13x21x3xf32> { + %0 = tosa.pad %arg0, %arg1 : (tensor<13x21x3xf32>, tensor<6xi32>) -> tensor<13x21x3xf32> return %0 : tensor<13x21x3xf32> } // ----- // CHECK-LABEL: pad_explicit_value -func.func @test_pad_explicit_value(%arg0: tensor<13x21x3xf32>, %arg1: tensor<3x2xi32>) -> tensor<13x21x3xf32> { +func.func @test_pad_explicit_value(%arg0: tensor<13x21x3xf32>, %arg1: tensor<6xi32>) -> tensor<13x21x3xf32> { %0 = "tosa.const"() {value = dense<3.14> : tensor} : () -> tensor - %1 = tosa.pad %arg0, %arg1, %0 : (tensor<13x21x3xf32>, tensor<3x2xi32>, tensor) -> tensor<13x21x3xf32> + %1 = tosa.pad %arg0, %arg1, %0 : (tensor<13x21x3xf32>, tensor<6xi32>, tensor) -> tensor<13x21x3xf32> return %1 : tensor<13x21x3xf32> } diff --git a/mlir/test/Dialect/Tosa/tosa-decompose-conv2d.mlir b/mlir/test/Dialect/Tosa/tosa-decompose-conv2d.mlir index d876ccfb3b911..fc9c947e203c4 100644 --- a/mlir/test/Dialect/Tosa/tosa-decompose-conv2d.mlir +++ b/mlir/test/Dialect/Tosa/tosa-decompose-conv2d.mlir @@ -58,9 +58,9 @@ func.func @conv_with_dynamic_dim(%arg0: tensor, %arg1: tensor<384 // CHECK-LABEL: @conv2d_as_fully_connected_padded func.func @conv2d_as_fully_connected_padded(%arg0: tensor<4x10x10x2xi8>, %arg1: tensor<3x1x1x2xi8>, %arg2: tensor<3xi32>) -> tensor<4x12x12x3xi32> { - // CHECK-DAG: %[[PAD_SHAPE:.+]] = "tosa.const"() <{value = dense<{{\[\[}}0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi64>} + // CHECK-DAG: %[[PAD_SHAPE:.+]] = "tosa.const"() <{value = dense<{{\[}}0, 0, 1, 1, 1, 1, 0, 0]> : tensor<8xi64>} // CHECK-DAG: %[[PAD_VAL:.+]] = "tosa.const"() <{value = dense<42> : tensor} - // CHECK-DAG: %[[PAD:.+]] = tosa.pad %arg0, %[[PAD_SHAPE]], %[[PAD_VAL]] : (tensor<4x10x10x2xi8>, tensor<4x2xi64>, tensor) -> tensor<4x12x12x2xi8> + // CHECK-DAG: %[[PAD:.+]] = tosa.pad %arg0, %[[PAD_SHAPE]], %[[PAD_VAL]] : (tensor<4x10x10x2xi8>, tensor<8xi64>, tensor) -> tensor<4x12x12x2xi8> // CHECK-DAG: %[[RESHAPE_INPUT:.+]] = tosa.reshape %[[PAD]] {new_shape = array} // CHECK-DAG: %[[RESHAPE_FILTER:.+]] = tosa.reshape %arg1 {new_shape = array} // CHECK-DAG: %[[FULLY:.+]] = tosa.fully_connected %[[RESHAPE_INPUT]], %[[RESHAPE_FILTER]], %arg2 {quantization_info = #tosa.conv_quant} diff --git a/mlir/test/Dialect/Tosa/tosa-decompose-depthwise.mlir b/mlir/test/Dialect/Tosa/tosa-decompose-depthwise.mlir index 2224bf3f57b25..0df299080d851 100644 --- a/mlir/test/Dialect/Tosa/tosa-decompose-depthwise.mlir +++ b/mlir/test/Dialect/Tosa/tosa-decompose-depthwise.mlir @@ -46,10 +46,10 @@ func.func @depthwise_conv2d_as_mul_q(%arg0: tensor<4x10x10x2xi8>, %arg1: tensor< // CHECK-LABEL: @depthwise_conv2d_as_mul_padded func.func @depthwise_conv2d_as_mul_padded(%arg0: tensor<4x10x10x2xf32>, %arg1: tensor<1x1x2x3xf32>, %arg2: tensor<6xf32>) -> tensor<4x12x12x6xf32> { - // CHECK-DAG: %[[pad:.+]] = "tosa.const"() <{value = dense<{{\[\[}}0, 0], [1, 1], [1, 1], [0, 0], [0, 0]]> : tensor<5x2xi64>} + // CHECK-DAG: %[[pad:.+]] = "tosa.const"() <{value = dense<{{\[}}0, 0, 1, 1, 1, 1, 0, 0, 0, 0]> : tensor<10xi64>} // CHECK-DAG: %[[zero:.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor} // CHECK: %[[reIn:.+]] = tosa.reshape %arg0 {new_shape = array} - // CHECK: %[[padded:.+]] = tosa.pad %[[reIn]], %[[pad]], %[[zero]] : (tensor<4x10x10x2x1xf32>, tensor<5x2xi64>, tensor) -> tensor<4x12x12x2x1xf32> + // CHECK: %[[padded:.+]] = tosa.pad %[[reIn]], %[[pad]], %[[zero]] : (tensor<4x10x10x2x1xf32>, tensor<10xi64>, tensor) -> tensor<4x12x12x2x1xf32> // CHECK: %[[reArg1:.+]] = tosa.reshape %arg1 {new_shape = array} // CHECK: %[[mul:.+]] = tosa.mul %3, %[[reArg1]] {shift = 0 : i8} // CHECK: %[[reOut:.+]] = tosa.reshape %[[mul]] {new_shape = array} diff --git a/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir b/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir index 1f2bb3fb9a365..893ec4a7de65d 100644 --- a/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir +++ b/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir @@ -44,7 +44,7 @@ func.func @transpose_conv2d_quantized_padded(%arg0: tensor<2x16x14x3xi8>, %arg1: // CHECK-LABEL: @transpose_conv2d_strided func.func @transpose_conv2d_strided(%arg0: tensor<2x17x15x3xf32>, %arg1: tensor<5x3x5x3xf32>, %arg2: tensor<5xf32>) -> tensor<2x?x?x5xf32> { // Manipulate the weight matrix to handle striding. - // CHECK-DAG: %[[PADV:.+]] = "tosa.const"() <{value = dense<{{\[\[}}0, 0], [0, 1], [0, 1], [0, 0]]> : tensor<4x2xi32>} + // CHECK-DAG: %[[PADV:.+]] = "tosa.const"() <{value = dense<{{\[}}0, 0, 0, 1, 0, 1, 0, 0]> : tensor<8xi32>} // CHECK-DAG: %[[TRANSV:.+]] = "tosa.const"() <{value = dense<[2, 4, 0, 1, 3, 5]> : tensor<6xi32>} // CHECK-DAG: %[[PADW:.+]] = tosa.pad %arg1, %[[PADV]] // CHECK-DAG: %[[RESW1:.+]] = tosa.reshape %[[PADW]] {new_shape = array} @@ -54,7 +54,7 @@ func.func @transpose_conv2d_strided(%arg0: tensor<2x17x15x3xf32>, %arg1: tensor< // CHECK-DAG: %[[NEWWEIGHT:.+]] = tosa.reverse %[[REV1]] {axis = 2 : i32} // Pad out the input matrix to handle the transpose conv. - // CHECK-DAG: %[[PAD:.+]] = "tosa.const"() <{value = dense<{{\[\[}}0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32>} + // CHECK-DAG: %[[PAD:.+]] = "tosa.const"() <{value = dense<{{\[}}0, 0, 1, 1, 1, 1, 0, 0]> : tensor<8xi32>} // CHECK-DAG: %[[TRANS2:.+]] = "tosa.const"() <{value = dense<[0, 1, 3, 2, 4, 5]> : tensor<6xi32>} // CHECK-DAG: %[[NEWINPUT:.+]] = tosa.pad %arg0, %[[PAD]] @@ -77,7 +77,7 @@ func.func @transpose_conv2d_strided(%arg0: tensor<2x17x15x3xf32>, %arg1: tensor< // CHECK-LABEL: @transpose_conv2d_strided_quantized func.func @transpose_conv2d_strided_quantized(%arg0: tensor<2x17x15x3xi8>, %arg1: tensor<5x3x5x3xi8>, %arg2: tensor<5xi32>) -> (tensor<2x35x47x5xi32>) { // Manipulate the weight matrix to handle striding. - // CHECK-DAG: %[[PADV:.+]] = "tosa.const"() <{value = dense<{{\[\[}}0, 0], [0, 1], [0, 1], [0, 0]]> : tensor<4x2xi32>} + // CHECK-DAG: %[[PADV:.+]] = "tosa.const"() <{value = dense<{{\[}}0, 0, 0, 1, 0, 1, 0, 0]> : tensor<8xi32>} // CHECK-DAG: %[[TRANSV:.+]] = "tosa.const"() <{value = dense<[2, 4, 0, 1, 3, 5]> : tensor<6xi32>} // CHECK-DAG: %[[PADW:.+]] = tosa.pad %arg1, %[[PADV]] {quantization_info = #tosa.pad_quant} // CHECK-DAG: %[[RESW1:.+]] = tosa.reshape %[[PADW]] {new_shape = array} @@ -87,7 +87,7 @@ func.func @transpose_conv2d_strided_quantized(%arg0: tensor<2x17x15x3xi8>, %arg1 // CHECK-DAG: %[[NEWWEIGHT:.+]] = tosa.reverse %[[REV1]] {axis = 2 : i32} // Pad out the input matrix to handle the transpose conv. - // CHECK-DAG: %[[PAD:.+]] = "tosa.const"() <{value = dense<{{\[\[}}0, 0], [1, 1], [1, 1], [0, 0]]> : tensor<4x2xi32>} + // CHECK-DAG: %[[PAD:.+]] = "tosa.const"() <{value = dense<{{\[}}0, 0, 1, 1, 1, 1, 0, 0]> : tensor<8xi32>} // CHECK-DAG: %[[TRANS2:.+]] = "tosa.const"() <{value = dense<[0, 1, 3, 2, 4, 5]> : tensor<6xi32>} // CHECK-DAG: %[[NEWINPUT:.+]] = tosa.pad %arg0, %[[PAD]] {quantization_info = #tosa.pad_quant} @@ -108,12 +108,12 @@ func.func @transpose_conv2d_strided_quantized(%arg0: tensor<2x17x15x3xi8>, %arg1 // CHECK-LABEL: @transpose_conv2d_strided_overpad func.func @transpose_conv2d_strided_overpad(%arg0 : tensor<1x16x1x1xi8>, %arg1 : tensor<1x2x1x1xi8>, %arg2 : tensor<1xi32>) -> (tensor<1x19x2x1xi32>) { - // CHECK-DAG: %[[WEIGHT_PAD:.+]] = "tosa.const"() <{value = dense<{{\[}}[0, 0], [0, 0], [0, 1], [0, 0]]> : tensor<4x2xi32> + // CHECK-DAG: %[[WEIGHT_PAD:.+]] = "tosa.const"() <{value = dense<{{\[}}0, 0, 0, 0, 0, 1, 0, 0]> : tensor<8xi32> // CHECK-DAG: %[[WEIGHT_PERMS:.+]] = "tosa.const"() <{value = dense<[2, 4, 0, 1, 3, 5]> : tensor<6xi32>} - // CHECK-DAG: %[[INPUT_PAD:.+]] = "tosa.const"() <{value = dense<{{\[}}[0, 0], [1, 1], [0, 0], [0, 0]]> : tensor<4x2xi32>} + // CHECK-DAG: %[[INPUT_PAD:.+]] = "tosa.const"() <{value = dense<{{\[}}0, 0, 1, 1, 0, 0, 0, 0]> : tensor<8xi32>} // CHECK-DAG: %[[ZERO:.+]] = "tosa.const"() <{value = dense<0> : tensor<2xi32>} // CHECK-DAG: %[[RESULT_PERMS:.+]] = "tosa.const"() <{value = dense<[0, 1, 3, 2, 4, 5]> : tensor<6xi32>} - // CHECK-DAG: %[[RESULT_PAD:.+]] = "tosa.const"() <{value = dense<{{\[}}[0, 0], [2, 0], [0, 0], [0, 0]]> : tensor<4x2xi32>} + // CHECK-DAG: %[[RESULT_PAD:.+]] = "tosa.const"() <{value = dense<{{\[}}0, 0, 2, 0, 0, 0, 0, 0]> : tensor<8xi32>} // CHECK: %[[PAD_WEIGHT:.+]] = tosa.pad %arg1, %[[WEIGHT_PAD]] {quantization_info = #tosa.pad_quant} // CHECK: %[[RESHAPE_WEIGHT_0:.+]] = tosa.reshape %[[PAD_WEIGHT]] {new_shape = array} // CHECK: %[[TRANSPOSE_WEIGHT:.+]] = tosa.transpose %[[RESHAPE_WEIGHT_0]], %[[WEIGHT_PERMS]] diff --git a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir index d46de740800e9..7daf46e375e12 100644 --- a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir +++ b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir @@ -495,9 +495,9 @@ func.func @test_concat_axis_1(%arg0 : tensor<2x1xf32>, %arg1 : tensor<2x2xf32>) // ----- // CHECK-LABEL: @test_padding_no_const -func.func @test_padding_no_const(%arg0 : tensor<1x2xf32>, %arg1 : tensor<2x2xi32>) -> () { - // CHECK: tosa.pad %arg0, %arg1 : (tensor<1x2xf32>, tensor<2x2xi32>) -> tensor - %0 = tosa.pad %arg0, %arg1 : (tensor<1x2xf32>, tensor<2x2xi32>) -> tensor +func.func @test_padding_no_const(%arg0 : tensor<1x2xf32>, %arg1 : tensor<4xi32>) -> () { + // CHECK: tosa.pad %arg0, %arg1 : (tensor<1x2xf32>, tensor<4xi32>) -> tensor + %0 = tosa.pad %arg0, %arg1 : (tensor<1x2xf32>, tensor<4xi32>) -> tensor return } @@ -505,9 +505,9 @@ func.func @test_padding_no_const(%arg0 : tensor<1x2xf32>, %arg1 : tensor<2x2xi32 // CHECK-LABEL:@test_padding_dynamic_input func.func @test_padding_dynamic_input(%arg0 : tensor<1x?xf32>) -> () { - %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> - // CHECK: tosa.pad %arg0, %cst : (tensor<1x?xf32>, tensor<2x2xi32>) -> tensor<4x?xf32> - %1 = tosa.pad %arg0, %0 : (tensor<1x?xf32>, tensor<2x2xi32>) -> tensor + %0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> + // CHECK: tosa.pad %arg0, %cst : (tensor<1x?xf32>, tensor<4xi32>) -> tensor<4x?xf32> + %1 = tosa.pad %arg0, %0 : (tensor<1x?xf32>, tensor<4xi32>) -> tensor return } @@ -515,9 +515,9 @@ func.func @test_padding_dynamic_input(%arg0 : tensor<1x?xf32>) -> () { // CHECK-LABEL: @test_padding_simple func.func @test_padding_simple(%arg0 : tensor<1x2xf32>) -> () { - %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> - // CHECK: tosa.pad %arg0, %cst : (tensor<1x2xf32>, tensor<2x2xi32>) -> tensor<4x9xf32> - %1 = tosa.pad %arg0, %0 : (tensor<1x2xf32>, tensor<2x2xi32>) -> tensor + %0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> + // CHECK: tosa.pad %arg0, %cst : (tensor<1x2xf32>, tensor<4xi32>) -> tensor<4x9xf32> + %1 = tosa.pad %arg0, %0 : (tensor<1x2xf32>, tensor<4xi32>) -> tensor return } From 366e62a0cb5d1c94d3b281f094755c4dd4c76df9 Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Wed, 8 Jan 2025 00:49:29 -0800 Subject: [PATCH 2/9] [X86] Combine `uitofp to ` (#121809) Closes #121793 --- .../SelectionDAG/LegalizeVectorOps.cpp | 25 +++ llvm/test/CodeGen/X86/uint_to_half.ll | 198 ++++++++++++++++++ 2 files changed, 223 insertions(+) create mode 100644 llvm/test/CodeGen/X86/uint_to_half.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index e8404a13009a7..89a00c5a4f043 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1777,6 +1777,31 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, assert((BW == 64 || BW == 32) && "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + // If STRICT_/FMUL is not supported by the target (in case of f16) replace the + // UINT_TO_FP with a larger float and round to the smaller type + if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) || + (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) { + EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64; + SDValue UIToFP; + SDValue Result; + SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true); + EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT); + if (IsStrict) { + UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other}, + {Node->getOperand(0), Src}); + Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other}, + {Node->getOperand(0), UIToFP, TargetZero}); + Results.push_back(Result); + Results.push_back(Result.getValue(1)); + } else { + UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src); + Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero); + Results.push_back(Result); + } + + return; + } + SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT); // Constants to clear the upper part of the word. diff --git a/llvm/test/CodeGen/X86/uint_to_half.ll b/llvm/test/CodeGen/X86/uint_to_half.ll new file mode 100644 index 0000000000000..b62a07eec1ce6 --- /dev/null +++ b/llvm/test/CodeGen/X86/uint_to_half.ll @@ -0,0 +1,198 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx,+f16c | FileCheck %s -check-prefixes=AVX1 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+f16c | FileCheck %s -check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s -check-prefixes=AVX512 + +define <8 x half> @test_uitofp_v8i32_v8f16(<8 x i32> %a) { +; AVX1-LABEL: test_uitofp_v8i32_v8f16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 +; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_uitofp_v8i32_v8f16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] +; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_uitofp_v8i32_v8f16: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %vec = uitofp <8 x i32> %a to <8 x half> + ret <8 x half> %vec +} + +define <8 x half> @test_strict_uitofp_v8i32_v8f16(<8 x i32> %a) { +; AVX1-LABEL: test_strict_uitofp_v8i32_v8f16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 +; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_strict_uitofp_v8i32_v8f16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] +; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_strict_uitofp_v8i32_v8f16: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %vec = tail call <8 x half> @llvm.experimental.constrained.uitofp.f16.i32(<8 x i32> %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x half> %vec +} + +define <16 x half> @test_uitofp_v16i32_v16f16(<16 x i32> %a) { +; AVX1-LABEL: test_uitofp_v16i32_v16f16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4] +; AVX1-NEXT: vmulps %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [65535,65535,65535,65535,65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX1-NEXT: vpsrld $16, %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vpsrld $16, %xmm5, %xmm5 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2 +; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2 +; AVX1-NEXT: vmulps %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 +; AVX1-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_uitofp_v16i32_v16f16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] +; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm4[1],ymm0[2],ymm4[3],ymm0[4],ymm4[5],ymm0[6],ymm4[7],ymm0[8],ymm4[9],ymm0[10],ymm4[11],ymm0[12],ymm4[13],ymm0[14],ymm4[15] +; AVX2-NEXT: vbroadcastss {{.*#+}} ymm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] +; AVX2-NEXT: vsubps %ymm5, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; AVX2-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15] +; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm4[1],ymm1[2],ymm4[3],ymm1[4],ymm4[5],ymm1[6],ymm4[7],ymm1[8],ymm4[9],ymm1[10],ymm4[11],ymm1[12],ymm4[13],ymm1[14],ymm4[15] +; AVX2-NEXT: vsubps %ymm5, %ymm1, %ymm1 +; AVX2-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_uitofp_v16i32_v16f16: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512-NEXT: retq + %vec = uitofp <16 x i32> %a to <16 x half> + ret <16 x half> %vec +} + +define <16 x half> @test_strict_uitofp_v16i32_v16f16(<16 x i32> %a) { +; AVX1-LABEL: test_strict_uitofp_v16i32_v16f16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4] +; AVX1-NEXT: vmulps %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [65535,65535,65535,65535,65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX1-NEXT: vpsrld $16, %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vpsrld $16, %xmm5, %xmm5 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2 +; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2 +; AVX1-NEXT: vmulps %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 +; AVX1-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_strict_uitofp_v16i32_v16f16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] +; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm4[1],ymm0[2],ymm4[3],ymm0[4],ymm4[5],ymm0[6],ymm4[7],ymm0[8],ymm4[9],ymm0[10],ymm4[11],ymm0[12],ymm4[13],ymm0[14],ymm4[15] +; AVX2-NEXT: vbroadcastss {{.*#+}} ymm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] +; AVX2-NEXT: vsubps %ymm5, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; AVX2-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15] +; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm4[1],ymm1[2],ymm4[3],ymm1[4],ymm4[5],ymm1[6],ymm4[7],ymm1[8],ymm4[9],ymm1[10],ymm4[11],ymm1[12],ymm4[13],ymm1[14],ymm4[15] +; AVX2-NEXT: vsubps %ymm5, %ymm1, %ymm1 +; AVX2-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_strict_uitofp_v16i32_v16f16: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512-NEXT: retq + %vec = tail call <16 x half> @llvm.experimental.constrained.uitofp.f16.i32(<16 x i32> %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <16 x half> %vec +} From e5341784dbcc9e166827233a66fb54645204a43e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 8 Jan 2025 09:50:04 +0100 Subject: [PATCH 3/9] [LLVM] Update inlining maintainers (#120579) Update maintainers for inlining, and add section for NewPM/CGSCC. --- llvm/Maintainers.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index 8d86ac45f3d05..2ccf30b8139aa 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -45,8 +45,12 @@ Matthew.Arsenault@amd.com, arsenm2@gmail.com (email), [arsenm](https://github.co #### Inlining -Chandler Carruth \ -chandlerc@gmail.com, chandlerc@google.com (email), [chandlerc](https://github.com/chandlerc) (GitHub) +Arthur Eubanks \ +aeubanks@google.com (email), [aeubanks](https://github.com/aeubanks) (GitHub) \ +Mircea Trofin (esp. ML inliner) \ +mtrofin@google.com (email), [mtrofin](https://github.com/mtrofin) (GitHub) \ +Kazu Hirata (esp. module inliner and inline order) \ +kazu@google.com (email), [kazutakahirata](https://github.com/kazutakahirata) (GitHub) #### InstCombine, InstSimplify, ValueTracking, ConstantFold @@ -65,6 +69,11 @@ mail@justinbogner.com (email), [bogner](https://github.com/bogner) (GitHub) Diego Novillo \ dnovillo@google.com (email), [dnovillo](https://github.com/dnovillo) (GitHub) +#### New pass manager, CGSCC, LazyCallGraph + +Arthur Eubanks \ +aeubanks@google.com (email), [aeubanks](https://github.com/aeubanks) (GitHub) + #### LoopStrengthReduce Quentin Colombet \ @@ -462,7 +471,7 @@ sabre@nondot.org (email), [lattner](https://github.com/lattner) (GitHub), clattn Paul C. Anagnostopoulos (paul@windfall.com, [Paul-C-Anagnostopoulos](https://github.com/Paul-C-Anagnostopoulos)) -- TableGen \ Justin Bogner (mail@justinbogner.com, [bogner](https://github.com/bogner)) -- SelectionDAG \ -Chandler Carruth (chandlerc@gmail.com, chandlerc@google.com, [chandlerc](https://github.com/chandlerc)) -- ADT, Support \ +Chandler Carruth (chandlerc@gmail.com, chandlerc@google.com, [chandlerc](https://github.com/chandlerc)) -- ADT, Support, Inlining \ Peter Collingbourne (peter@pcc.me.uk, [pcc](https://github.com/pcc)) -- LTO \ Evan Cheng (evan.cheng@apple.com) -- Parts of code generator not covered by someone else \ Jake Ehrlich (jakehehrlich@google.com, [jakehehrlich](https://github.com/jakehehrlich)) -- llvm-objcopy and ObjCopy library \ From 70ab81bc749d0ad67362e612dbb6429ed00a47ec Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Wed, 8 Jan 2025 08:54:48 +0000 Subject: [PATCH 4/9] [AArch64] Ensure APAS instruction passes register parameter (#121928) In PR #112341, the `APAS` instruction was added as part of the Armv9.6-A specification, but it didn't take the Xt register parameter. This change fixes this. --- llvm/lib/Target/AArch64/AArch64InstrFormats.td | 2 ++ llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s | 10 +++++++++- .../MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt | 12 ++++++++++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 47c4c6c39565f..b6fabdb7db59d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1804,7 +1804,9 @@ class TMSystemException op1, string asm, list pattern> } class APASI : SimpleSystemI<0, (ins GPR64:$Xt), "apas", "\t$Xt">, Sched<[]> { + bits<5> Xt; let Inst{20-5} = 0b0111001110000000; + let Inst{4-0} = Xt; let DecoderNamespace = "APAS"; } diff --git a/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s b/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s index baf05f10b9a1e..093101b6cd812 100644 --- a/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s +++ b/llvm/test/MC/AArch64/armv9.6a-rme-gpc3.s @@ -2,10 +2,18 @@ // RUN: llvm-mc -triple aarch64 -show-encoding %s | FileCheck %s .func: apas x0 + apas x1 + apas x2 + apas x17 + apas x30 mrs x3, GPCBW_EL3 msr GPCBW_EL3, x4 # CHECK: .func: -# CHECK-NEXT: apas x0 // encoding: [0x1f,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x0 // encoding: [0x00,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x1 // encoding: [0x01,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x2 // encoding: [0x02,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x17 // encoding: [0x11,0x70,0x0e,0xd5] +# CHECK-NEXT: apas x30 // encoding: [0x1e,0x70,0x0e,0xd5] # CHECK-NEXT: mrs x3, GPCBW_EL3 // encoding: [0xa3,0x21,0x3e,0xd5] # CHECK-NEXT: msr GPCBW_EL3, x4 // encoding: [0xa4,0x21,0x1e,0xd5] diff --git a/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt b/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt index c5d074bf0394f..d198771c341b9 100644 --- a/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt +++ b/llvm/test/MC/Disassembler/AArch64/armv9.6a-rme-gpc3.txt @@ -1,10 +1,18 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mc -triple aarch64 -disassemble %s | FileCheck %s -[0x1f,0x70,0x0e,0xd5] +[0x00,0x70,0x0e,0xd5] +[0x01,0x70,0x0e,0xd5] +[0x02,0x70,0x0e,0xd5] +[0x11,0x70,0x0e,0xd5] +[0x1e,0x70,0x0e,0xd5] [0xa3,0x21,0x3e,0xd5] [0xa4,0x21,0x1e,0xd5] -# CHECK: sys #6, c7, c0, #0 +# CHECK: sys #6, c7, c0, #0, x0 +# CHECK-NEXT: sys #6, c7, c0, #0, x1 +# CHECK-NEXT: sys #6, c7, c0, #0, x2 +# CHECK-NEXT: sys #6, c7, c0, #0, x17 +# CHECK-NEXT: sys #6, c7, c0, #0, x30 # CHECK-NEXT: mrs x3, GPCBW_EL3 # CHECK-NEXT: msr GPCBW_EL3, x4 From b037bceef6a40c5c00c1f67cc5a334e2c4e5e041 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 8 Jan 2025 09:56:56 +0100 Subject: [PATCH 5/9] Add LLVM_GSL_POINTER to llvm::function_ref. (#120699) This can enable clang to detect dangling assignment issues, see #120698. --- llvm/include/llvm/ADT/STLFunctionalExtras.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/STLFunctionalExtras.h b/llvm/include/llvm/ADT/STLFunctionalExtras.h index 3b9d40959d714..a4d50dc3648be 100644 --- a/llvm/include/llvm/ADT/STLFunctionalExtras.h +++ b/llvm/include/llvm/ADT/STLFunctionalExtras.h @@ -36,8 +36,8 @@ namespace llvm { /// a function_ref. template class function_ref; -template -class function_ref { +template +class LLVM_GSL_POINTER function_ref { Ret (*callback)(intptr_t callable, Params ...params) = nullptr; intptr_t callable; From 32bc029be6265838833623fdd88cc665d5658dc7 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 8 Jan 2025 08:59:15 +0000 Subject: [PATCH 6/9] [AArch64] Fix signed comparison warning. NFC --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 0640efde79d01..25b6731cb313a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4748,7 +4748,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost( LT.second.getVectorNumElements() / 2) { if (Index == 0) return 0; - if (Index == LT.second.getVectorNumElements() / 2) + if (Index == (int)LT.second.getVectorNumElements() / 2) return 1; } Kind = TTI::SK_PermuteSingleSrc; From 44e8ee73591bad22ae19748be825c4b66d7b3dde Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Wed, 8 Jan 2025 10:56:03 +0100 Subject: [PATCH 7/9] [flang][doc] refine zero initialization extension documentation (#121956) Following-up on [comments ](https://github.com/llvm/llvm-project/issues/62432#issuecomment-2555316522) in the issue that motivated this extension. --- flang/docs/Extensions.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index 2d1c967a6068d..907f01204a387 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -160,7 +160,11 @@ end * `<>` as synonym for `.NE.` and `/=` * `$` and `@` as legal characters in names * Initialization in type declaration statements using `/values/` -* Saved variables without explicit or default initializers are zero initialized. +* Saved variables without explicit or default initializers are zero initialized, + except for scalar variables from the main program that are not explicitly + initialized or marked with an explicit SAVE attribute (these variables may be + placed on the stack by flang and not zero initialized). It is not advised to + rely on this extension in new code. * In a saved entity of a type with a default initializer, components without default values are zero initialized. * Kind specification with `*`, e.g. `REAL*4` From 303249c4490a7777a744d9afd449b64ff1132a42 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Wed, 8 Jan 2025 10:05:21 +0000 Subject: [PATCH 8/9] [flang][StackArrays] track pointers through fir.convert (#121919) This does add a little computational complexity because now every freemem operation has to be tested for every allocation. This could be improved with some more memoisation but I think it is easier to read this way. Let me know if you would prefer me to change this to pre-compute the normalised addresses each freemem operation is using. Weirdly, this change resulted in a verifier failure for the fir.declare in the previous test case. Maybe it was previously removed as dead code and now it isn't. Anyway I fixed that too. --- .../lib/Optimizer/Transforms/StackArrays.cpp | 37 +++++++++++-------- flang/test/Transforms/stack-arrays.fir | 20 +++++++++- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp index bdcb8199b790d..2a9d3397e87b0 100644 --- a/flang/lib/Optimizer/Transforms/StackArrays.cpp +++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp @@ -330,6 +330,18 @@ std::optional LatticePoint::get(mlir::Value val) const { return it->second; } +static mlir::Value lookThroughDeclaresAndConverts(mlir::Value value) { + while (mlir::Operation *op = value.getDefiningOp()) { + if (auto declareOp = llvm::dyn_cast(op)) + value = declareOp.getMemref(); + else if (auto convertOp = llvm::dyn_cast(op)) + value = convertOp->getOperand(0); + else + return value; + } + return value; +} + mlir::LogicalResult AllocationAnalysis::visitOperation( mlir::Operation *op, const LatticePoint &before, LatticePoint *after) { LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op @@ -363,10 +375,10 @@ mlir::LogicalResult AllocationAnalysis::visitOperation( mlir::Value operand = op->getOperand(0); // Note: StackArrays is scheduled in the pass pipeline after lowering hlfir - // to fir. Therefore, we only need to handle `fir::DeclareOp`s. - if (auto declareOp = - llvm::dyn_cast_if_present(operand.getDefiningOp())) - operand = declareOp.getMemref(); + // to fir. Therefore, we only need to handle `fir::DeclareOp`s. Also look + // past converts in case the pointer was changed between different pointer + // types. + operand = lookThroughDeclaresAndConverts(operand); std::optional operandState = before.get(operand); if (operandState && *operandState == AllocationState::Allocated) { @@ -535,17 +547,12 @@ AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem, // remove freemem operations llvm::SmallVector erases; - for (mlir::Operation *user : allocmem.getOperation()->getUsers()) { - if (auto declareOp = mlir::dyn_cast_if_present(user)) { - for (mlir::Operation *user : declareOp->getUsers()) { - if (mlir::isa(user)) - erases.push_back(user); - } - } - - if (mlir::isa(user)) - erases.push_back(user); - } + mlir::Operation *parent = allocmem->getParentOp(); + // TODO: this shouldn't need to be re-calculated for every allocmem + parent->walk([&](fir::FreeMemOp freeOp) { + if (lookThroughDeclaresAndConverts(freeOp->getOperand(0)) == allocmem) + erases.push_back(freeOp); + }); // now we are done iterating the users, it is safe to mutate them for (mlir::Operation *erase : erases) diff --git a/flang/test/Transforms/stack-arrays.fir b/flang/test/Transforms/stack-arrays.fir index 66cd2a5aa910b..444136d53e034 100644 --- a/flang/test/Transforms/stack-arrays.fir +++ b/flang/test/Transforms/stack-arrays.fir @@ -379,7 +379,8 @@ func.func @placement_loop_declare() { %3 = arith.addi %c1, %c2 : index // operand is now available %4 = fir.allocmem !fir.array, %3 - %5 = fir.declare %4 {uniq_name = "temp"} : (!fir.heap>) -> !fir.heap> + %shape = fir.shape %3 : (index) -> !fir.shape<1> + %5 = fir.declare %4(%shape) {uniq_name = "temp"} : (!fir.heap>, !fir.shape<1>) -> !fir.heap> // ... fir.freemem %5 : !fir.heap> fir.result %3, %c1_i32 : index, i32 @@ -400,3 +401,20 @@ func.func @placement_loop_declare() { // CHECK-NEXT: } // CHECK-NEXT: return // CHECK-NEXT: } + +// Can we look through fir.convert and fir.declare? +func.func @lookthrough() { + %0 = fir.allocmem !fir.array<42xi32> + %c42 = arith.constant 42 : index + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %1 = fir.declare %0(%shape) {uniq_name = "name"} : (!fir.heap>, !fir.shape<1>) -> !fir.heap> + %2 = fir.convert %1 : (!fir.heap>) -> !fir.ref> + // use the ref so the converts aren't folded + %3 = fir.load %2 : !fir.ref> + %4 = fir.convert %2 : (!fir.ref>) -> !fir.heap> + fir.freemem %4 : !fir.heap> + return +} +// CHECK: func.func @lookthrough() { +// CHECK: fir.alloca !fir.array<42xi32> +// CHECK-NOT: fir.freemem From 67efbd0bf1b2df8a479e09eb2be7db4c3c892f2c Mon Sep 17 00:00:00 2001 From: Ryan Mansfield Date: Wed, 8 Jan 2025 05:07:23 -0500 Subject: [PATCH 9/9] [LLVM] Fix various cl::desc typos and whitespace issues (NFC) (#121955) --- llvm/lib/Analysis/ScalarEvolution.cpp | 2 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 2 +- llvm/lib/CodeGen/MIRSampleProfile.cpp | 5 +++-- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 2 +- .../lib/CodeGen/MachineBranchProbabilityInfo.cpp | 2 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 2 +- llvm/lib/CodeGen/RegisterCoalescer.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- .../CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 6 +++--- .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 6 +++--- llvm/lib/Passes/PassBuilderPipelines.cpp | 8 ++++---- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- .../Transforms/IPO/FunctionSpecialization.cpp | 14 +++++++------- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 2 +- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 +- llvm/lib/Transforms/IPO/SampleProfile.cpp | 11 ++++++----- .../Instrumentation/HWAddressSanitizer.cpp | 2 +- .../Instrumentation/LowerAllowCheckPass.cpp | 2 +- .../Instrumentation/PGOInstrumentation.cpp | 16 ++++++++-------- .../lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 2 +- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 2 +- .../lib/Transforms/Scalar/LoopVersioningLICM.cpp | 2 +- .../lib/Transforms/Utils/AssumeBundleBuilder.cpp | 4 ++-- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 7 ++++--- 24 files changed, 55 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index b5668a14a4a21..7e18f7c9c1ace 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -226,7 +226,7 @@ static cl::opt RangeIterThreshold( static cl::opt MaxLoopGuardCollectionDepth( "scalar-evolution-max-loop-guard-collection-depth", cl::Hidden, - cl::desc("Maximum depth for recrusive loop guard collection"), cl::init(1)); + cl::desc("Maximum depth for recursive loop guard collection"), cl::init(1)); static cl::opt ClassifyExpressions("scalar-evolution-classify-expressions", diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 5c712e4f007d3..ba1b10ec8b9b1 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -152,7 +152,7 @@ static cl::opt static cl::opt EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), - cl::desc("Enable sinkinig and/cmp into branches.")); + cl::desc("Enable sinking and/cmp into branches.")); static cl::opt DisableStoreExtract( "disable-cgp-store-extract", cl::Hidden, cl::init(false), diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp index 23db09b89599a..9bba50e8e6924 100644 --- a/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -46,8 +46,9 @@ static cl::opt ShowFSBranchProb( cl::desc("Print setting flow sensitive branch probabilities")); static cl::opt FSProfileDebugProbDiffThreshold( "fs-profile-debug-prob-diff-threshold", cl::init(10), - cl::desc("Only show debug message if the branch probility is greater than " - "this value (in percentage).")); + cl::desc( + "Only show debug message if the branch probability is greater than " + "this value (in percentage).")); static cl::opt FSProfileDebugBWThreshold( "fs-profile-debug-bw-threshold", cl::init(10000), diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 0f68313e64f54..05bc4cf646f42 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -149,7 +149,7 @@ static cl::opt JumpInstCost("jump-inst-cost", static cl::opt TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " - "Creates more fallthrough opportunites in " + "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden); diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 56ffffff62240..2e92dd8f257b4 100644 --- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -29,7 +29,7 @@ namespace llvm { cl::opt StaticLikelyProb("static-likely-prob", cl::desc("branch probability threshold in percentage" - "to be considered very likely"), + " to be considered very likely"), cl::init(80), cl::Hidden); cl::opt ProfileLikelyProb( diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index e61dad5cf64d0..b94992c20b119 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -140,7 +140,7 @@ static cl::opt GreedyReverseLocalAssignment( static cl::opt SplitThresholdForRegWithHint( "split-threshold-for-reg-with-hint", cl::desc("The threshold for splitting a virtual register with a hint, in " - "percentate"), + "percentage"), cl::init(75), cl::Hidden); static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 7dac0deed7b7e..8313927dd2aa1 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -113,7 +113,7 @@ static cl::opt LargeIntervalSizeThreshold( static cl::opt LargeIntervalFreqThreshold( "large-interval-freq-threshold", cl::Hidden, - cl::desc("For a large interval, if it is coalesed with other live " + cl::desc("For a large interval, if it is coalesced with other live " "intervals many times more than the threshold, stop its " "coalescing to control the compile time. "), cl::init(256)); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6ea9e68a3a005..da3c834417d6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -141,7 +141,7 @@ static cl::opt EnableReduceLoadOpStoreWidth( static cl::opt ReduceLoadOpStoreWidthForceNarrowingProfitable( "combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), - cl::desc("DAG combiner force override the narrowing profitable check when" + cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences")); static cl::opt EnableShrinkLoadReplaceStoreWithStore( diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 9e5867c70d7b6..51ee3cc681f05 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -125,9 +125,9 @@ static cl::opt MaxReorderWindow( cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp")); -static cl::opt AvgIPC( - "sched-avg-ipc", cl::Hidden, cl::init(1), - cl::desc("Average inst/cycle whan no target itinerary exists.")); +static cl::opt + AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle when no target itinerary exists.")); namespace { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 26fc75c0578ec..dff7243b0a99c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -43,9 +43,9 @@ STATISTIC(LoadsClustered, "Number of loads clustered together"); // without a target itinerary. The choice of number here has more to do with // balancing scheduler heuristics than with the actual machine latency. static cl::opt HighLatencyCycles( - "sched-high-latency-cycles", cl::Hidden, cl::init(10), - cl::desc("Roughly estimate the number of cycles that 'long latency'" - "instructions take for targets with no itinerary")); + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency' " + "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {} diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index d737ea5ab070a..4ec0fb8fc81ea 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -189,9 +189,9 @@ static cl::opt EnableGlobalAnalyses( "enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses")); -static cl::opt - RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, - cl::desc("Run Partial inlinining pass")); +static cl::opt RunPartialInlining("enable-partial-inlining", + cl::init(false), cl::Hidden, + cl::desc("Run Partial inlining pass")); static cl::opt ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, @@ -264,7 +264,7 @@ static cl::opt static cl::opt FlattenedProfileUsed( "flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " - "no inline hierachy exists in the profile")); + "no inline hierarchy exists in the profile")); static cl::opt EnableOrderFileInstrumentation( "enable-order-file-instrumentation", cl::init(false), cl::Hidden, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a6bfb489faff2..6b0eb38e7e095 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -94,7 +94,7 @@ static cl::opt BrMergingCcmpBias( static cl::opt WidenShift("x86-widen-shift", cl::init(true), - cl::desc("Replacte narrow shifts with wider shifts."), + cl::desc("Replace narrow shifts with wider shifts."), cl::Hidden); static cl::opt BrMergingLikelyBias( diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 96956481df2f6..449d64d1614ff 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -66,19 +66,19 @@ static cl::opt MaxCodeSizeGrowth( "Maximum codesize growth allowed per function")); static cl::opt MinCodeSizeSavings( - "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc( - "Reject specializations whose codesize savings are less than this" - "much percent of the original function size")); + "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, + cl::desc("Reject specializations whose codesize savings are less than this " + "much percent of the original function size")); static cl::opt MinLatencySavings( "funcspec-min-latency-savings", cl::init(40), cl::Hidden, - cl::desc("Reject specializations whose latency savings are less than this" + cl::desc("Reject specializations whose latency savings are less than this " "much percent of the original function size")); static cl::opt MinInliningBonus( - "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc( - "Reject specializations whose inlining bonus is less than this" - "much percent of the original function size")); + "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, + cl::desc("Reject specializations whose inlining bonus is less than this " + "much percent of the original function size")); static cl::opt SpecializeOnAddress( "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc( diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 16a80e9ebbeaa..78cd249c9c16a 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -105,7 +105,7 @@ static cl::opt ColdCCRelFreq( "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::desc( "Maximum block frequency, expressed as a percentage of caller's " - "entry frequency, for a call site to be considered cold for enabling" + "entry frequency, for a call site to be considered cold for enabling " "coldcc")); /// Is this global variable possibly used by a leak checker as a root? If so, diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index b40ab357670b8..67585e9c80ef4 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -129,7 +129,7 @@ static cl::opt PrintModuleBeforeOptimizations( static cl::opt AlwaysInlineDeviceFunctions( "openmp-opt-inline-device", - cl::desc("Inline all applicible functions on the device."), cl::Hidden, + cl::desc("Inline all applicable functions on the device."), cl::Hidden, cl::init(false)); static cl::opt diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 603beb3b883d7..b978c54ef96fd 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -162,7 +162,7 @@ static cl::opt ProfileSampleBlockAccurate( static cl::opt ProfileAccurateForSymsInList( "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " - "be accurate. It may be overriden by profile-sample-accurate. ")); + "be accurate. It may be overridden by profile-sample-accurate. ")); static cl::opt ProfileMergeInlinee( "sample-profile-merge-inlinee", cl::Hidden, cl::init(true), @@ -193,9 +193,10 @@ static cl::opt ProfileSizeInline( // and inline the hot functions (that are skipped in this pass). static cl::opt DisableSampleLoaderInlining( "disable-sample-loader-inlining", cl::Hidden, cl::init(false), - cl::desc("If true, artifically skip inline transformation in sample-loader " - "pass, and merge (or scale) profiles (as configured by " - "--sample-profile-merge-inlinee).")); + cl::desc( + "If true, artificially skip inline transformation in sample-loader " + "pass, and merge (or scale) profiles (as configured by " + "--sample-profile-merge-inlinee).")); namespace llvm { cl::opt @@ -255,7 +256,7 @@ static cl::opt PrecentMismatchForStalenessError( static cl::opt CallsitePrioritizedInline( "sample-profile-prioritized-inline", cl::Hidden, - cl::desc("Use call site prioritized inlining for sample profile loader." + cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported.")); static cl::opt UsePreInlinerDecision( diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 530061e3b6bb7..2031728c2f33d 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -192,7 +192,7 @@ static cl::opt cl::Hidden); static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", - cl::desc("Hot percentile cuttoff.")); + cl::desc("Hot percentile cutoff.")); static cl::opt ClRandomSkipRate("hwasan-random-rate", diff --git a/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp index 2418030dd601a..f27798cfd228c 100644 --- a/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp +++ b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp @@ -30,7 +30,7 @@ using namespace llvm; static cl::opt HotPercentileCutoff("lower-allow-check-percentile-cutoff-hot", - cl::desc("Hot percentile cuttoff.")); + cl::desc("Hot percentile cutoff.")); static cl::opt RandomRate("lower-allow-check-random-rate", diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 471086ce3a751..db4d62ec36751 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -158,11 +158,11 @@ STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed"); // Command line option to specify the file to read profile from. This is // mainly used for testing. -static cl::opt - PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, - cl::value_desc("filename"), - cl::desc("Specify the path of profile data file. This is" - "mainly for test purpose.")); +static cl::opt PGOTestProfileFile( + "pgo-test-profile-file", cl::init(""), cl::Hidden, + cl::value_desc("filename"), + cl::desc("Specify the path of profile data file. This is " + "mainly for test purpose.")); static cl::opt PGOTestProfileRemappingFile( "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), @@ -186,7 +186,7 @@ static cl::opt MaxNumAnnotations( // to write to the metadata for a single memop intrinsic. static cl::opt MaxNumMemOPAnnotations( "memop-max-annotations", cl::init(4), cl::Hidden, - cl::desc("Max number of preicise value annotations for a single memop" + cl::desc("Max number of precise value annotations for a single memop" "intrinsic")); // Command line option to control appending FunctionHash to the name of a COMDAT @@ -291,13 +291,13 @@ static cl::opt PGOVerifyHotBFI( cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " - "internal option -pass-remakrs-analysis=pgo.")); + "internal option -pass-remarks-analysis=pgo.")); static cl::opt PGOVerifyBFI( "pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " - "internal option -pass-remakrs-analysis=pgo.")); + "internal option -pass-remarks-analysis=pgo.")); static cl::opt PGOVerifyBFIRatio( "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index ba1c2241aea9a..3c82eeda54838 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -128,7 +128,7 @@ static cl::opt static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", - cl::desc("Use loop idiom recognition code size heuristics when compiling" + cl::desc("Use loop idiom recognition code size heuristics when compiling " "with -Os/-Oz"), cl::init(true), cl::Hidden); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 260cc72c3188d..090348809e571 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -104,7 +104,7 @@ static cl::opt UnrollMaxPercentThresholdBoost( static cl::opt UnrollMaxIterationsCountToAnalyze( "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, - cl::desc("Don't allow loop unrolling to simulate more than this number of" + cl::desc("Don't allow loop unrolling to simulate more than this number of " "iterations when checking full unroll profitability")); static cl::opt UnrollCount( diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index f58dcb51f64fe..6e91c4fa6e230 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -95,7 +95,7 @@ static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable"; /// invariant instructions in a loop. static cl::opt LVInvarThreshold("licm-versioning-invariant-threshold", - cl::desc("LoopVersioningLICM's minimum allowed percentage" + cl::desc("LoopVersioningLICM's minimum allowed percentage " "of possible invariant instructions per loop"), cl::init(25), cl::Hidden); diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp index 1d4f5618b39d0..b499ef839729e 100644 --- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -28,8 +28,8 @@ using namespace llvm; namespace llvm { cl::opt ShouldPreserveAllAttributes( "assume-preserve-all", cl::init(false), cl::Hidden, - cl::desc("enable preservation of all attrbitues. even those that are " - "unlikely to be usefull")); + cl::desc("enable preservation of all attributes. even those that are " + "unlikely to be useful")); cl::opt EnableKnowledgeRetention( "enable-knowledge-retention", cl::init(false), cl::Hidden, diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 03dc6c1d17446..e367b01a09090 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -96,8 +96,9 @@ using namespace PatternMatch; cl::opt llvm::RequireAndPreserveDomTree( "simplifycfg-require-and-preserve-domtree", cl::Hidden, - cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " - "into preserving DomTree,")); + cl::desc( + "Temporary development switch used to gradually uplift SimplifyCFG " + "into preserving DomTree,")); // Chosen as 2 so as to be cheap, but still to have enough power to fold // a select, so the "clamp" idiom (of a min followed by a max) will be caught. @@ -126,7 +127,7 @@ static cl::opt HoistLoadsStoresWithCondFaulting( static cl::opt HoistLoadsStoresWithCondFaultingThreshold( "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), - cl::desc("Control the maximal conditonal load/store that we are willing " + cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"));